# necessary libraries
library(plyr)
## Warning: package 'plyr' was built under R version 4.3.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyr)
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# install.packages("fastmap")
# install.packages("skimr")
library(skimr)
## Warning: package 'skimr' was built under R version 4.3.3
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.3.2
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.1
library(readr)
dataset_all <- read_excel("Final Dataset.xlsx")
head(dataset_all)
## # A tibble: 6 × 49
## Sr_No movie year production_budget domestic_gross foreign_gross
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0 Avatar 2009 425000000 760507625 2015837654
## 2 1 Pirates of the Car… 2011 410600000 241063875 804600000
## 3 2 Avengers: Age of U… 2015 330600000 459005868 944008095
## 4 3 Avengers: Infinity… 2018 300000000 678815482 1369318718
## 5 4 Justice League 2017 300000000 229024295 426920914
## 6 5 Justice League 2017 300000000 229024295 426920914
## # ℹ 43 more variables: worldwide_gross <dbl>, month <dbl>, profit <dbl>,
## # profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## # popularity <dbl>, release_date <chr>, original_language <chr>,
## # vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Remove Column First
# Explicitly use dplyr to remove the column
dataset_all <- dplyr::select(dataset_all, -Sr_No)
We are going to do for year 2010 to 2018
# remove all other years
# Filter the data to include only rows where year is between 2010 and 2018
data <- subset(dataset_all, year >= 2010 & year <= 2018)
# Display the filtered data
head(data)
## # A tibble: 6 × 48
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Justice … 2017 300000000 229024295 426920914 655945209
## 6 Spectre 2015 300000000 200074175 679546748 879620923
## # ℹ 42 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
CPI
# CPI data from 2010 to 2018, with 2018 as the base year
cpi_data <- data.frame(
year = 2010:2018,
cpi = c(218.1, 224.9, 229.6, 233, 236.7, 237, 240, 245.1, 251.1)
)
# Convert the 'year' column in 'data' to integer to match 'cpi_data'
data <- data %>%
mutate(year = as.integer(year))
# Now perform the join
data <- data %>%
left_join(cpi_data, by = "year")
# View the result
print(data)
## # A tibble: 1,702 × 49
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates… 2011 410600000 241063875 804600000 1045663875
## 2 Avenger… 2015 330600000 459005868 944008095 1403013963
## 3 Avenger… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice… 2017 300000000 229024295 426920914 655945209
## 5 Justice… 2017 300000000 229024295 426920914 655945209
## 6 Spectre 2015 300000000 200074175 679546748 879620923
## 7 Spectre 2015 300000000 200074175 679546748 879620923
## 8 The Dar… 2012 275000000 448139099 636300000 1084439099
## 9 Solo: A… 2018 275000000 213767512 179383835 393151347
## 10 The Lon… 2013 275000000 89302115 170700000 260002115
## # ℹ 1,692 more rows
## # ℹ 43 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Set base CPI (2018 CPI value)
base_cpi <- cpi_data$cpi[cpi_data$year == 2018]
# Adjust for inflation
data <- data %>%
mutate(
production_budget_adj = production_budget * (base_cpi / cpi),
domestic_gross_adj = domestic_gross * (base_cpi / cpi),
foreign_gross_adj = foreign_gross * (base_cpi / cpi),
worldwide_gross_adj = worldwide_gross * (base_cpi / cpi),
profit_adj = worldwide_gross_adj - production_budget_adj,
roi_adj = (profit_adj / production_budget_adj) * 100
)
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Justice … 2017 300000000 229024295 426920914 655945209
## 6 Spectre 2015 300000000 200074175 679546748 879620923
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Inflation adjustments have been applied to financial data (e.g., budgets, gross revenues) using the CPI with 2018 as the base year. This ensures monetary comparisons are meaningful over time.
There are few dublicate observation.
# Identify and display the duplicate rows based on 'match_key'
duplicate_data <- data[duplicated(data$match_key) | duplicated(data$match_key, fromLast = TRUE), ]
# Display the duplicate rows
print(duplicate_data)
## # A tibble: 305 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Justice… 2017 300000000 229024295 426920914 655945209
## 2 Justice… 2017 300000000 229024295 426920914 655945209
## 3 Spectre 2015 300000000 200074175 679546748 879620923
## 4 Spectre 2015 300000000 200074175 679546748 879620923
## 5 Robin H… 2010 210000000 105487148 216971858 322459006
## 6 Robin H… 2010 210000000 105487148 216971858 322459006
## 7 Robin H… 2010 210000000 105487148 216971858 322459006
## 8 Robin H… 2010 210000000 105487148 216971858 322459006
## 9 Rogue O… 2016 200000000 532177324 516925532 1049102856
## 10 Rogue O… 2016 200000000 532177324 516925532 1049102856
## # ℹ 295 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove the duplicate rows, keeping only the first occurrence of each 'match_key'
data <- data[!duplicated(data$match_key), ]
# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
We are removing the observations that have 0 in ‘domestic_gross’ and ‘foreign_gross’
# Identify and display the rows where both 'domestic_gross' and 'foreign_gross' are 0
zero_gross_data <- subset(data, domestic_gross == 0 & foreign_gross == 0)
# Display the observations with both 'domestic_gross' and 'foreign_gross' as 0
print(zero_gross_data)
## # A tibble: 97 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Bright 2017 90000000 0 0 0
## 2 The Rid… 2015 60000000 0 0 0
## 3 The Fac… 2015 26000000 0 0 0
## 4 Dwegons… 2014 20000000 0 0 0
## 5 Fight V… 2016 20000000 0 0 0
## 6 Bird Box 2018 19800000 0 0 0
## 7 The Pri… 2014 18000000 0 0 0
## 8 Forsaken 2016 18000000 0 0 0
## 9 Drive H… 2014 12000000 0 0 0
## 10 Dancin'… 2015 12000000 0 0 0
## # ℹ 87 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(domestic_gross == 0 & foreign_gross == 0))
# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Removing observation which has ZERO in ‘domestic_gross’
# Identify and display the rows where 'domestic_gross' are 0
zero_dgross <- subset(data, domestic_gross == 0)
# Display the observations with both 'domestic_gross' and 'foreign_gross' as 0
print(zero_dgross)
## # A tibble: 40 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Air Str… 2018 65000000 0 516279 516279
## 2 The Lov… 2015 35000000 0 53899 53899
## 3 Konfere… 2010 30000000 0 53048539 53048539
## 4 Acciden… 2015 26000000 0 135436 135436
## 5 Ironclad 2011 25000000 0 5297411 5297411
## 6 Zambezia 2012 20000000 0 34454336 34454336
## 7 Survivor 2015 20000000 0 1703281 1703281
## 8 The Fro… 2013 19200000 0 5617460 5617460
## 9 I Am Wr… 2016 18000000 0 309608 309608
## 10 Wolves 2014 18000000 0 94953 94953
## # ℹ 30 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(domestic_gross == 0))
# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Removing observation which has ZERO in ‘foreign_gross’
# Identify and display the rows where 'foreign_gross' are 0
zero_fgross <- subset(data, foreign_gross == 0)
# Display the observations with both 'foreign_gross'
print(zero_dgross)
## # A tibble: 40 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Air Str… 2018 65000000 0 516279 516279
## 2 The Lov… 2015 35000000 0 53899 53899
## 3 Konfere… 2010 30000000 0 53048539 53048539
## 4 Acciden… 2015 26000000 0 135436 135436
## 5 Ironclad 2011 25000000 0 5297411 5297411
## 6 Zambezia 2012 20000000 0 34454336 34454336
## 7 Survivor 2015 20000000 0 1703281 1703281
## 8 The Fro… 2013 19200000 0 5617460 5617460
## 9 I Am Wr… 2016 18000000 0 309608 309608
## 10 Wolves 2014 18000000 0 94953 94953
## # ℹ 30 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(foreign_gross == 0))
# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Removing the none observation from the dataset
# Remove rows where genres is "none"
data <- data[data$genres != "none", ]
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Removing the Not Rated observation from the MPAA Variable
# Remove rows where genres is "none"
data <- data[data$MPAA_Rating != "Not Rated", ]
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
After cleaning the data we see that there is no movies in the variable named TV Movie.
# Remove the 'TV Movie' variable from the dataset
data <- data[ , !(names(data) %in% c("TV Movie"))]
# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 54
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 48 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## # roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## # release_date <chr>, original_language <chr>, vote_average <dbl>,
## # vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
New variables
Seasons
# Define a function to categorize months into seasons
get_season <- function(month) {
if (month %in% c(12, 1, 2)) {
return("Winter")
} else if (month %in% c(3, 4, 5)) {
return("Spring")
} else if (month %in% c(6, 7, 8)) {
return("Summer")
} else if (month %in% c(9, 10, 11)) {
return("Fall")
}
}
# Apply the function to the 'month' column to create the 'season' column
data$Seasons <- sapply(data$month, get_season)
# Convert 'season' to a factor for analysis
data$Seasons <- as.factor(data$Seasons)
# Reorder columns: Insert 'season' right after 'month'
month_index <- which(colnames(data) == "month") # Find the index of the 'month' column
data <- data[, c(1:month_index, ncol(data), (month_index+1):(ncol(data)-1))]
# Check the first few rows to see the new 'season' column
head(data)
## # A tibble: 6 × 55
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 49 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## # popularity <dbl>, release_date <chr>, original_language <chr>,
## # vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Profit Adj Margin
# Calculate profit_adj_margin based on the adjusted profit and worldwide gross
data <- data %>%
mutate(
profit_adj_margin = profit_adj / worldwide_gross_adj)
Genre Count
# List of genre columns in the dataset
genre_columns <- c('Action', 'Adventure', 'Animation', 'Comedy', 'Crime',
'Documentary', 'Drama', 'Family', 'Fantasy', 'History',
'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction',
'Thriller', 'War', 'Western')
# Create a new column 'genre_count' which is the sum of the genre columns for each movie
data$genre_count <- rowSums(data[, genre_columns])
# Check the first few rows to see the new 'genre_count' column
head(data)
## # A tibble: 6 × 57
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 51 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## # popularity <dbl>, release_date <chr>, original_language <chr>,
## # vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## # Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Runtime Categories
# Categorize runtime into buckets
data <- data %>%
mutate(Runtime_category = case_when(
Runtime <= 90 ~ "Less than 90",
Runtime > 90 & Runtime <= 135 ~ "90 to 135",
Runtime > 135 ~ "Greater than 135"
))
# Convert to factor
data$Runtime_category <- as.factor(data$Runtime_category)
# Check if the transformation is correct
table(data$Runtime_category)
##
## 90 to 135 Greater than 135 Less than 90
## 1076 99 137
Vote Ratio
# Calculate the vote ratio and create a new column
data$vote_ratio <- data$vote_average / data$vote_count
# Find the index of the 'vote_count' column
vote_count_index <- which(colnames(data) == "vote_count")
# Reorder columns to place 'vote_ratio' right after 'vote_count'
data <- data[, c(1:vote_count_index, ncol(data), (vote_count_index+1):(ncol(data)-1))]
# Check the first few rows to see the new 'vote_ratio' column
head(data)
## # A tibble: 6 × 59
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 53 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## # popularity <dbl>, release_date <chr>, original_language <chr>,
## # vote_average <dbl>, vote_count <dbl>, vote_ratio <dbl>, genre_list <chr>,
## # genres <chr>, Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>,
## # Crime <dbl>, Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>,
## # History <dbl>, Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …
Profit Categories
# Define profit categories based on profit relative to production_budget_adj
data$profit_category <- with(data, ifelse(profit_adj <= 0, "Loss",
ifelse(profit_adj <= production_budget_adj, "Break-even",
ifelse(profit_adj <= production_budget_adj * 2, "Profitable",
"Successfull"))))
# Find the index of the 'profit' column
profit_index <- which(colnames(data) == "profit")
# Reorder columns to place 'profit_category' right after 'profit'
data <- data[, c(1:profit_index, ncol(data), (profit_index+1):(ncol(data)-1))]
# View the distribution of movies in each category
table(data$profit_category)
##
## Break-even Loss Profitable Successfull
## 273 261 248 530
# Optional: Calculate the percentage of movies in each category
prop.table(table(data$profit_category)) * 100
##
## Break-even Loss Profitable Successfull
## 20.80793 19.89329 18.90244 40.39634
# Check the first few rows to verify the profit categories
head(data)
## # A tibble: 6 × 60
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 54 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## # Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Creating the main Genre variable
# Extract the first genre from the 'genres_list' column and create a new column 'main_genres'
data$main_genres <- sapply(strsplit(data$genre_list, ", "), `[`, 1)
# Clean up the extra characters [' and ' from the 'main_genres'
data$main_genres <- gsub("\\[|\\]|'", "", data$main_genres)
# Find the index of the 'genres' column
genres_index <- which(colnames(data) == "genres")
# Reorder columns to place 'main_genres' right after 'genres'
data <- data[, c(1:genres_index, ncol(data), (genres_index+1):(ncol(data)-1))]
# Add "Main_" prefix to each category in 'main_genres'
data$main_genres <- paste("Main_", data$main_genres, sep = "")
# Display the first few rows to check the result
head(data)
## # A tibble: 6 × 61
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Pirates … 2011 410600000 241063875 804600000 1045663875
## 2 Avengers… 2015 330600000 459005868 944008095 1403013963
## 3 Avengers… 2018 300000000 678815482 1369318718 2048134200
## 4 Justice … 2017 300000000 229024295 426920914 655945209
## 5 Spectre 2015 300000000 200074175 679546748 879620923
## 6 The Dark… 2012 275000000 448139099 636300000 1084439099
## # ℹ 55 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <chr>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Giving them a binary variable
# List of genres to create binary variables for
genres <- c("Main_Action", "Main_Adventure", "Main_Animation", "Main_Comedy", "Main_Crime",
"Main_Documentary", "Main_Drama", "Main_Family", "Main_Fantasy", "Main_History",
"Main_Horror", "Main_Music", "Main_Mystery", "Main_Romance", "Main_Science Fiction",
"Main_Thriller", "Main_War", "Main_Western")
# Create binary variables for each genre
for (genre in genres) {
data[[genre]] <- ifelse(data$main_genres == genre, 1, 0)
}
data$main_genres <- as.factor(as.character(data$main_genres))
# View the dataframe
summary(data$main_genres)
## Main_Action Main_Adventure Main_Animation
## 247 86 46
## Main_Comedy Main_Crime Main_Documentary
## 239 63 10
## Main_Drama Main_Family Main_Fantasy
## 321 17 30
## Main_History Main_Horror Main_Music
## 9 76 5
## Main_Mystery Main_Romance Main_Science Fiction
## 14 32 30
## Main_Thriller Main_War Main_Western
## 73 12 2
# Define the genres to include in "Other_Genres"
other_genres <- c("Main_Music", "Main_Western")
# Create the "Other_Genres" variable
data$Other_Genres <- ifelse(data$main_genres %in% other_genres, 1, 0)
# Ensure the main_genres column is correctly formatted
head(data$main_genres)
## [1] Main_Adventure Main_Action Main_Adventure Main_Action Main_Action
## [6] Main_Action
## 18 Levels: Main_Action Main_Adventure Main_Animation Main_Comedy ... Main_Western
head(data$Other_Genres)
## [1] 0 0 0 0 0 0
table(data$Other_Genres)
##
## 0 1
## 1305 7
Groups less frequent genres (e.g., Music, Western) into a separate binary variable. Reduces sparsity in genre categories for cleaner analysis and ensures rare genres are still captured.
Data Vis
Scatter Plot: Production Budget vs. Worldwide Gross
# Scatter plot
ggplot(data, aes(x = production_budget_adj, y = worldwide_gross_adj)) +
geom_point(color = "blue", alpha = 0.6) +
labs(title = "Production Budget vs Worldwide Gross",
x = "Production Budget (USD)",
y = "Worldwide Gross (USD)") +
theme_minimal()
The scatter plot demonstrates a clear positive correlation between production budgets and worldwide gross, indicating that higher budgets generally lead to higher revenues. However, diminishing returns are observed for extremely high budgets (beyond $200 million), where revenue growth is less proportional. Notable outliers include some movies with exceptionally high profits relative to their budget, likely representing blockbusters, while others with high budgets but lower gross highlight potential losses or inefficiencies. This underscores the importance of strategic budget allocation, particularly in the mid-range budget segment, where outcomes are more variable and require further analysis by factors like genre or timing to optimize investments.
Bar Chart: Movie Profits by Year
# Bar chart of total profits per year
ggplot(data, aes(x = factor(year), y = profit_adj)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Total Movie Profits by Year",
x = "Year",
y = "Total Profit (USD)") +
theme_minimal()
The bar chart illustrates yearly total adjusted profits from 2010 to 2018, showing a consistent trend with slight fluctuations. The overall profits remain relatively stable, with notable peaks in 2014 and 2016, indicating these years were particularly profitable for the film industry. Conversely, 2011 appears to have experienced slightly lower profits compared to other years. This stability suggests a resilient industry, but the peaks highlight years that likely benefited from a higher volume of successful movies or blockbuster releases. Further analysis of genres or seasons during peak years could provide insights into what drove these trends.
Histogram: Distribution of Production Budget
# Histogram for distribution of production budgets
ggplot(data, aes(x = production_budget_adj)) +
geom_histogram(binwidth = 50000000, fill = "purple", color = "black") +
labs(title = "Distribution of Production Budgets",
x = "Production Budget (USD)",
y = "Number of Movies") +
theme_minimal()
The histogram reveals that most movies have relatively low production budgets, with the majority clustering below $50 million. A steep drop-off occurs as budgets increase, indicating fewer high-budget films. This suggests that the film industry predominantly operates in the low-to-mid budget range, while only a small fraction of movies receive substantial funding exceeding $200 million. These high-budget films are likely blockbusters or major studio productions, representing significant but rarer investments.
Density Plot: Domestic vs Foreign Gross
# Density plot for domestic and foreign gross
ggplot(data) +
geom_density(aes(x = domestic_gross_adj, fill = "Domestic"), alpha = 0.5, color = "darkblue") +
geom_density(aes(x = foreign_gross_adj, fill = "Foreign"), alpha = 0.5, color = "darkred") +
labs(title = "Density Plot of Domestic vs Foreign Gross",
x = "Gross Earnings (USD)",
y = "Density") +
theme_minimal()
The density plot compares the distribution of domestic and foreign gross earnings. Both distributions are heavily right-skewed, with the majority of movies earning less than $200 million in both markets. However, foreign gross shows a slightly wider spread, indicating that some movies perform exceptionally well internationally compared to domestic markets. This suggests the importance of targeting global audiences for maximizing revenue, especially for films with cross-cultural or international appeal.
Facet Grid: Profitability Across Seasons
library(ggplot2)
ggplot(data, aes(x = year, y = profit_adj, group = 1, color = Seasons)) +
geom_line(size = 1) +
facet_grid(. ~ Seasons) +
labs(
title = "Profitability Trends Across Seasons",
x = "Year",
y = "Profit (USD)"
) +
theme_minimal() +
theme(
legend.position = "none",
strip.text = element_text(face = "bold", size = 12)
) +
scale_color_manual(
values = c("Spring" = "darkgreen", "Summer" = "blue", "Fall" = "orange", "Winter" = "purple")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Spring shows higher peaks in profitability compared to Summer in several years, suggesting that movies released in Spring are performing exceptionally well. Summer still shows consistent profitability across years but often trails Spring in terms of the highest profit spikes. Fall and Winter remain the least profitable seasons, with lower peaks and reduced variability. This suggests Spring might offer the most lucrative release opportunities, challenging the conventional dominance of Summer. Strategic scheduling in Spring could capitalize on this trend, especially for high-budget films.
Interactive Plot with Plotly: Production Budget vs Worldwide Gross
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Create an interactive scatter plot
p <- ggplot(data, aes(x = production_budget_adj, y = worldwide_gross_adj, text = movie)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
labs(title = "Production Budget vs Worldwide Gross",
x = "Production Budget (USD)",
y = "Worldwide Gross (USD)")
# Make it interactive with plotly
ggplotly(p)
The interactive scatter plot showcases the relationship between production budget and worldwide gross, similar to the earlier static version. The interactive format adds value by allowing exploration of specific data points, such as identifying individual movies and outliers. The trend confirms a positive correlation, where higher budgets generally yield higher worldwide grosses, but diminishing returns become apparent at extreme budgets. The ability to hover over points for details facilitates deeper insight into standout performances, such as blockbuster hits or underperforming high-budget films, making this visualization particularly useful for identifying case studies or patterns in movie success.
Interactive Plot with Plotly: Production Budget vs Profit
# Create an interactive scatter plot
q <- ggplot(data, aes(x = production_budget_adj, y = profit_adj, text = movie)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
labs(title = "Production Budget vs profit",
x = "Production Budget (USD)",
y = "profit")
# Make it interactive with plotly
ggplotly(q)
The interactive scatter plot of production budget vs. profit highlights an interesting pattern. While higher budgets generally correlate with higher profits, there is significant variability. Some high-budget movies result in substantial profits, reinforcing their blockbuster success, while others show minimal or even negative profits, suggesting inefficient budget allocation or market underperformance. Lower-budget films often exhibit a more concentrated profit range but can still achieve impressive returns. The interactive feature allows the identification of specific movies that defy the trend, providing valuable insights into successful strategies or missteps in film investments.
Interactive Plot with Plotly: World Wide Gross Adj vs IMDB Rating
# Create an interactive scatter plot
r <- ggplot(data, aes(x = worldwide_gross_adj, y = IMDb_Rating, text = movie)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
labs(title = "Worldwide Gross Adj vs IMDb Rating",
x = "Worldwide Gross Adj (USD)",
y = "IMDb Rating")
# Make it interactive with plotly
ggplotly(r)
The interactive scatter plot of worldwide gross versus IMDb rating reveals an interesting relationship. While there is a slight positive trend indicating that higher-rated movies may achieve higher worldwide gross, the correlation is not particularly strong. Many films with average IMDb ratings (6-7) perform well in terms of gross revenue, likely due to factors like marketing, franchise power, or genre popularity. Outliers with high IMDb ratings but low gross highlight critically acclaimed but less commercially successful films, while some movies achieve high gross despite average ratings, likely due to broad audience appeal or established franchises. The interactive feature aids in pinpointing specific movies driving these trends.
Bar Plot: Average ROI by Profit Category
# Bar plot: Average ROI by Profit Category
library(ggplot2)
ggplot(data, aes(x = profit_category, y = roi_adj)) +
stat_summary(fun = "mean", geom = "bar", fill = "skyblue") +
labs(title = "Average ROI by Profit Category", x = "Profit Category", y = "ROI") +
theme_minimal()
The bar plot of average ROI by profit category highlights that “Successful” movies have significantly higher average ROI compared to other categories, exceeding 600%, showcasing their exceptional financial returns. “Profitable” movies also exhibit decent ROI, while “Break-even” movies hover around zero. As expected, movies categorized as “Loss” demonstrate negative ROI. This emphasizes that targeting the “Successful” category, through careful selection of genres, budgets, and release strategies, is crucial for maximizing returns in film investments.
Box Plot: IMDb Rating by MPAA Rating
# Box plot: IMDb Rating by MPAA Rating
library(ggplot2)
ggplot(data, aes(x = MPAA_Rating, y = IMDb_Rating, fill = MPAA_Rating)) +
geom_boxplot() +
labs(
title = "IMDb Rating by MPAA Rating",
x = "MPAA Rating",
y = "IMDb Rating"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold")
) +
scale_fill_manual(
values = c("G" = "blue", "NC-17" = "red", "PG" = "green", "PG-13" = "purple", "R" = "orange")
)
The box plot illustrates the distribution of IMDb ratings across different MPAA ratings. “G” (General Audience) movies show a slightly narrower range of ratings, with relatively high medians, indicating consistent quality. “NC-17” has very few observations, leading to a tight range and high median, but its insights may be less generalizable. “PG,” “PG-13,” and “R” ratings display broader variability in ratings, reflecting diverse audience preferences and content. Notably, the median IMDb ratings for “PG-13” and “R” films are comparable, showing these categories often target mature and broad audiences. The chart suggests that MPAA ratings influence the perceived quality but are not sole determinants of IMDb scores.
Heatmap: Genre Count vs. Runtime
# Install necessary library for heatmap
library(ggplot2)
# Prepare data for the heatmap: Runtime vs Genre Count
ggplot(data, aes(x = genre_count, y = Runtime, fill = ..count..)) +
geom_bin2d(bins = 30) + # Use 2D binning for better heatmap
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(title = "Heatmap: Genre Count vs Runtime",
x = "Number of Genres",
y = "Runtime (min)",
fill = "Count") +
theme_minimal()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The heatmap highlights the relationship between the number of genres a movie belongs to and its runtime. Most movies are concentrated around 1–2 genres with runtimes between 90–120 minutes, indicating that simpler, more focused genre classifications are common for typical feature-length films. As the number of genres increases, the distribution of runtimes becomes more varied, with longer runtimes often associated with multi-genre films. This pattern suggests that movies with more genres may require additional runtime to develop the complexity needed to appeal to broader audiences or to blend diverse storytelling elements effectively.
Heatmap: Genre Count vs. World Wide Gross Adj
# Prepare data for the heatmap: Genre Count vs World Wide Gross Adj
ggplot(data, aes(x = genre_count, y = worldwide_gross_adj, fill = ..count..)) +
geom_bin2d(bins = 30) + # Use 2D binning
scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
labs(title = "Heatmap: Genre Count vs World Wide Gross Adj",
x = "Number of Genres",
y = "World Wide Gross Adj",
fill = "Count") +
theme_minimal()
The heatmap illustrates the relationship between the number of genres a movie has and its worldwide gross (adjusted). Movies with 1–2 genres dominate the dataset, with the majority grossing less than $200 million. However, movies with 3–4 genres appear more evenly distributed across higher gross ranges, suggesting that multi-genre films may appeal to broader audiences and achieve greater financial success. Films with 5 or more genres are rare but still show significant gross potential, indicating that complexity in genre blending can succeed if executed effectively. This pattern reinforces the idea that balancing genre diversity with audience targeting is key to maximizing revenue.
Heatmap: Genre Count vs. IMDb Rating
# Prepare data for the heatmap: Genre Count vs IMDb Rating
ggplot(data, aes(x = genre_count, y = IMDb_Rating, fill = ..count..)) +
geom_bin2d(bins = 30) + # Use 2D binning
scale_fill_gradient(low = "lightcoral", high = "darkred") +
labs(title = "Heatmap: Genre Count vs IMDb Rating",
x = "Number of Genres",
y = "IMDb Rating",
fill = "Count") +
theme_minimal()
The heatmap shows the relationship between the number of genres in a movie and its IMDb rating. Movies with 1–2 genres dominate the dataset and tend to have IMDb ratings clustering around 6–7, indicating that simpler genre classifications are popular and generally well-received. Movies with 3–4 genres show a broader spread in IMDb ratings, with many achieving higher ratings (above 7.5), suggesting that multi-genre films can be more critically acclaimed when executed well. However, movies with more than 4 genres are fewer in count and exhibit greater variability in ratings, reflecting either niche appeal or difficulty in maintaining consistent quality across diverse genre blends.
Lollipop Plot: IMDb Rating by Director (Top 40)
# Limit the number of directors to top 40 based on IMDb Rating
top_directors <- data %>%
arrange(desc(IMDb_Rating)) %>%
slice_head(n = 40)
# Lollipop plot: IMDb Rating by Director (Top 40)
ggplot(top_directors, aes(x = reorder(Director, IMDb_Rating), y = IMDb_Rating)) +
geom_segment(aes(xend = Director, yend = 0), color = "grey") +
geom_point(color = "steelblue", size = 4) +
coord_flip() +
labs(title = "Top 40 IMDb Ratings by Director", x = "Director", y = "IMDb Rating") +
theme_minimal(base_size = 12) + # Increase base font size for readability
theme(axis.text.y = element_text(size = 10)) # Adjust text size for director names
The lollipop plot highlights the top 40 directors ranked by their IMDb ratings.The visualization underscores the impact of directors in shaping highly-rated movies, which aligns with their established reputations for delivering quality content. This information is crucial for stakeholders considering partnerships or investments in directors for future projects.
Stacked Bar Plot: Profit Category by Main Genre
# Stacked bar plot: Profit Category by Main Genre
ggplot(data, aes(x = main_genres, fill = profit_category)) +
geom_bar(position = "fill") +
labs(title = "Profit Category Distribution Across Genres", x = "Main Genre", y = "Proportion") +
theme_minimal()
The stacked bar plot illustrates the distribution of profit categories across main genres. Certain genres, such as Animation and Adventure, have a higher proportion of “Successful” films, indicating strong profitability in these categories. Conversely, genres like Documentary and Music exhibit a greater proportion of “Loss” and “Break-even” films, suggesting higher financial risks. Action and Comedy show balanced distributions, with a mix of “Profitable” and “Successful” outcomes, reflecting their broad audience appeal. This visualization highlights which genres are safer bets for investment and which ones carry higher financial variability, providing critical insights for optimizing film investments.
Dumbbell Plot: Runtime vs IMDb Rating by MPAA Rating
# Install necessary library for dumbbell plot
library(ggalt)
## Warning: package 'ggalt' was built under R version 4.3.3
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(dplyr)
# Calculate average IMDb Rating and Runtime for each MPAA Rating
mpaa_summary <- data %>%
group_by(MPAA_Rating) %>%
summarise(avg_runtime = mean(Runtime, na.rm = TRUE),
avg_imdb = mean(IMDb_Rating, na.rm = TRUE))
# Dumbbell plot: Compare average Runtime and IMDb Rating by MPAA Rating
ggplot(mpaa_summary, aes(x = avg_runtime, xend = avg_imdb, y = MPAA_Rating)) +
geom_dumbbell(color = "lightblue", size = 3) +
labs(title = "Dumbbell Plot: Average Runtime vs IMDb Rating by MPAA Rating",
x = "Average Runtime (min) and IMDb Rating", y = "MPAA Rating") +
theme_minimal(base_size = 12)
## Warning: Using the `size` aesthetic with geom_segment was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The dumbbell plot provides a clearer comparison of the average runtime (left point) and IMDb rating (right point) for each MPAA rating category. The distances between the runtime and rating points are small, suggesting that runtime has little impact on IMDb ratings within these categories. Movies rated R and PG-13 maintain higher average runtimes, while those rated G and PG have shorter runtimes, reflecting their target audiences and simpler storylines. IMDb ratings, however, remain relatively uniform across all MPAA ratings, implying audience reception is less influenced by runtime or rating classification. This plot effectively highlights these relationships in a straightforward manner.
Sankey Diagram: Flow of Movies by Main Genre to Distributor (Top 10)
# Install necessary library for Sankey diagram
library(networkD3)
## Warning: package 'networkD3' was built under R version 4.3.3
library(dplyr)
# Count combinations of main_genres and Distributor
sankey_data <- as.data.frame(table(data$main_genres, data$Distributor))
# Rename the columns for clarity
colnames(sankey_data) <- c("main_genres", "Distributor", "n")
# Filter out rows where n is zero (optional, if needed)
sankey_data <- sankey_data[sankey_data$n > 0, ]
# Filter to include only the top 10 distributors to reduce clutter
top_distributors <- sankey_data %>%
group_by(Distributor) %>%
summarise(total = sum(n)) %>%
arrange(desc(total)) %>%
slice_head(n = 10)
sankey_data_filtered <- sankey_data %>%
filter(Distributor %in% top_distributors$Distributor)
# Create nodes and links for Sankey diagram
nodes <- unique(c(sankey_data_filtered$main_genres, sankey_data_filtered$Distributor))
sankey_links <- data.frame(source = match(sankey_data_filtered$main_genres, nodes) - 1,
target = match(sankey_data_filtered$Distributor, nodes) - 1,
value = sankey_data_filtered$n)
print(match(sankey_data_filtered$main_genres, nodes))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 1 2 3 4 5 6 8 9 12 13 15
## [26] 1 2 3 4 5 6 7 11 13 2 4 5 6 12 13 1 2 4 5 6 7 8 9 10 11
## [51] 12 13 1 2 3 4 5 16 6 8 9 11 12 13 15 1 3 4 5 6 7 17 9 11 1
## [76] 2 3 4 5 6 7 8 17 9 10 11 12 13 15 1 2 3 4 6 7 8 17 9 11 12
## [101] 13 1 2 3 4 5 6 7 8 9 18 10 11 12 13 15
# Create Sankey plot with better spacing and node sizing
sankeyNetwork(Links = sankey_links, Nodes = data.frame(name = nodes),
Source = "source", Target = "target", Value = "value",
NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
nodePadding = 15) # Increased node width and padding for clarity
The Sankey diagram reveals the flow of movies from their main genres to the top 10 distributors. Key players like Warner Bros. Pictures, Universal Pictures, and Walt Disney Studios Motion Pictures dominate the distribution of diverse genres, reflecting their broad market presence. Notably, Walt Disney Studios focuses primarily on Animation and Family genres, aligning with its family-friendly brand identity. On the other hand, distributors like Focus Features and Fox Searchlight Pictures cater to niche genres, indicating a targeted market approach. This visualization highlights strategic alignments between genres and distributors, providing insights for partnerships and market positioning in film investments.
Sankey Diagram: Flow of Movies by Main Genre to Production Company (Top 10 Production Companies)
# Install necessary libraries for Sankey diagram
library(networkD3)
library(dplyr)
# Count combinations of main_genres and Distributor
sankey_data_pc <- as.data.frame(table(data$main_genres, data$Production_Company))
# Rename the columns for clarity
colnames(sankey_data_pc) <- c("main_genres", "Production_Company", "n")
# Filter out rows where n is zero (optional, if needed)
sankey_data_pc <- sankey_data_pc[sankey_data_pc$n > 0, ]
# Filter to include only the top 10 production companies to reduce clutter
top_production_companies <- sankey_data_pc %>%
group_by(Production_Company) %>%
summarise(total = sum(n)) %>%
arrange(desc(total)) %>%
slice_head(n = 10)
sankey_data_pc_filtered <- sankey_data_pc %>%
filter(Production_Company %in% top_production_companies$Production_Company)
# Create nodes and links for Sankey diagram
nodes_pc <- unique(c(sankey_data_pc_filtered$main_genres, sankey_data_pc_filtered$Production_Company))
sankey_links_pc <- data.frame(source = match(sankey_data_pc_filtered$main_genres, nodes_pc) - 1,
target = match(sankey_data_pc_filtered$Production_Company, nodes_pc) - 1,
value = sankey_data_pc_filtered$n)
# Create Sankey plot for Main Genre to Production Company
sankeyNetwork(Links = sankey_links_pc, Nodes = data.frame(name = nodes_pc),
Source = "source", Target = "target", Value = "value",
NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
nodePadding = 15) # Increased node width and padding for clarity
The Sankey diagram illustrates the connection between main genres and the top 10 production companies. Walt Disney Pictures strongly dominates Animation and Family genres, aligning with its family-oriented brand image. DreamWorks Pictures and Village Roadshow Pictures demonstrate versatility, engaging with multiple genres such as Action, Drama, and Comedy. Companies like Blumhouse Productions and Screen Gems show focused specialization in Horror and Thriller, reflecting their niche strategies. This visualization highlights the production companies’ genre preferences, offering insights into which companies to approach for collaborations based on genre alignment.
Sankey Diagram: Flow of Movies by Main Genre to Profit Category
# Install necessary libraries for Sankey diagram
library(networkD3)
library(dplyr)
# Count combinations of main_genres and Distributor
sankey_data_profit <- as.data.frame(table(data$main_genres, data$profit_category))
# Rename the columns for clarity
colnames(sankey_data_profit) <- c("main_genres", "profit_category", "n")
# Filter out rows where n is zero (optional, if needed)
sankey_data_profit <- sankey_data_profit[sankey_data_profit$n > 0, ]
# Filter to include only the top 10 profit categories to reduce clutter
top_profit_categories <- sankey_data_profit %>%
group_by(profit_category) %>%
summarise(total = sum(n)) %>%
arrange(desc(total)) %>%
slice_head(n = 10)
sankey_data_profit_filtered <- sankey_data_profit %>%
filter(profit_category %in% top_profit_categories$profit_category)
# Create nodes and links for Sankey diagram
nodes_profit <- unique(c(sankey_data_profit_filtered$main_genres, sankey_data_profit_filtered$profit_category))
sankey_links_profit <- data.frame(source = match(sankey_data_profit_filtered$main_genres, nodes_profit) - 1,
target = match(sankey_data_profit_filtered$profit_category, nodes_profit) - 1,
value = sankey_data_profit_filtered$n)
# Create Sankey plot for Main Genre to Profit Category
sankeyNetwork(Links = sankey_links_profit, Nodes = data.frame(name = nodes_profit),
Source = "source", Target = "target", Value = "value",
NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
nodePadding = 15) # Increased node width and padding for clarity
The Sankey diagram illustrates the flow of movies from their main genres to profit categories. Genres like Animation, Adventure, and Family show a stronger connection to the “Successful” and “Profitable” categories, reflecting their ability to generate higher returns. Conversely, genres such as Documentary, History, and Mystery are more associated with “Loss” and “Break-even,” indicating greater financial risks. This visualization highlights which genres consistently drive profitability and which ones tend to underperform, providing valuable insights for optimizing investments based on genre-specific profit trends.
Density Ridgeline Plot: IMDb Rating by Main Genre
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.3.2
# Ridgeline plot: IMDb Rating by Main Genre
ggplot(data, aes(x = IMDb_Rating, y = main_genres, fill = main_genres)) +
geom_density_ridges(scale = 3, rel_min_height = 0.01) +
labs(title = "Density Ridgeline Plot: IMDb Rating by Main Genre",
x = "IMDb Rating", y = "Main Genre") +
theme_ridges() +
theme(legend.position = "none")
## Picking joint bandwidth of 0.355
The density ridgeline plot provides a comparison of IMDb ratings across different main genres. It shows the distribution and concentration of IMDb ratings for each genre, highlighting patterns of audience reception. For instance, genres such as Drama, Adventure, and Animation have a strong central tendency towards higher ratings (around 7-8), reflecting consistent audience approval. On the other hand, Documentary and Horror genres display wider variations, indicating diverse audience reception. This plot helps identify which genres generally achieve higher critical acclaim, aiding in strategic decisions about genre focus.
Analyzing Genre Preferences and Profitability
# Load necessary libraries
library(dplyr)
library(ggplot2)
# Calculate average ROI and audience satisfaction by genre
genre_roi <- data %>%
group_by(main_genres) %>%
summarize(
avg_roi = mean(roi_adj, na.rm = TRUE),
avg_rating = mean(IMDb_Rating, na.rm = TRUE)
)
# Plot ROI by genre
ggplot(genre_roi, aes(x = reorder(main_genres, avg_roi), y = avg_roi)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Average ROI by Genre", x = "Genre", y = "Average ROI")
# Plot audience satisfaction by genre
ggplot(genre_roi, aes(x = reorder(main_genres, avg_rating), y = avg_rating)) +
geom_bar(stat = "identity", fill = "green") +
coord_flip() +
labs(title = "Average Audience Satisfaction by Genre", x = "Genre", y = "Average Rating")
Average ROI by Genre: The blue bar chart highlights that genres such as Horror, Mystery, and Thriller tend to achieve the highest returns on investment (ROI). This suggests these genres are cost-efficient and capable of generating significant profits compared to their production budgets, possibly due to lower production costs or niche audience appeal.
Average Audience Satisfaction by Genre: The green chart reveals that genres such as Western, History, and Animation receive the highest average IMDb ratings, indicating strong audience satisfaction. However, genres like Horror and Mystery, despite high ROIs, have lower average ratings, showing a potential gap between profitability and viewer approval.
Evaluating Budget and Its Impact on ROI
# Relationship between budget and ROI
ggplot(data, aes(x = production_budget_adj, y = roi_adj)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
labs(title = "Production Budget vs. ROI", x = "Production Budget", y = "ROI")
## `geom_smooth()` using formula = 'y ~ x'
# Group movies by budget categories (low, medium, high)
data <- data %>%
mutate(budget_category = case_when(
production_budget_adj < 25000000 ~ "Low",
production_budget_adj >= 25000000 & production_budget_adj < 75000000 ~ "Medium",
TRUE ~ "High"
))
# Calculate ROI and profit margin by budget category
budget_roi <- data %>%
group_by(budget_category) %>%
summarize(
avg_roi = mean(roi_adj, na.rm = TRUE),
avg_profit_adj_margin = mean(profit_adj_margin, na.rm = TRUE)
)
# Plot ROI by budget category
ggplot(budget_roi, aes(x = budget_category, y = avg_roi)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Average ROI by Budget Category", x = "Budget Category", y = "Average ROI")
Scatter Plot (Production Budget vs. ROI): Movies with lower production budgets tend to show a much higher variation in ROI, with some extremely high outliers. The trend suggests a diminishing ROI as production budgets increase, indicating that lower-budget films might achieve greater profitability proportional to their cost.
Bar Chart (Average ROI by Budget Category): Low-budget movies exhibit the highest average ROI, far outperforming medium and high-budget movies. This could be due to their lower costs making even modest profits appear substantial in percentage terms. Medium and high-budget movies show relatively stable and comparable ROIs, though lower than low-budget films.
Optimizing Release Timing
# Analyze seasonality effect on profitability
season_roi <- data %>%
group_by(Seasons) %>%
summarize(
avg_profit = mean(profit_adj, na.rm = TRUE),
avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE)
)
# Plot average profit by season
ggplot(season_roi, aes(x = reorder(Seasons, avg_profit), y = avg_profit)) +
geom_bar(stat = "identity", fill = "purple") +
labs(title = "Average Profit by Season", x = "Season", y = "Average Profit")
# Plot average ROI by season
ggplot(season_roi, aes(x = reorder(Seasons, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
geom_bar(stat = "identity", fill = "orange") +
labs(title = "Average Worldwide Gross Adj by Season", x = "Season", y = "Average Worldwide Gross Adj")
The first plot, “Average Profit by Season,” indicates a clear seasonal trend in profits. Summer and Spring seasons emerge as the most profitable periods, with Summer leading slightly. Winter follows, with Fall trailing as the least profitable season. This pattern suggests that movies released during the Spring and Summer seasons benefit from higher profitability, likely due to increased audience turnout during holidays and favorable weather conditions.
The second plot, “Average Worldwide Gross Adj by Season,” aligns closely with the profit analysis. It shows that Spring and Summer seasons not only yield higher profits but also generate significantly higher worldwide gross earnings. This consistency underscores the importance of timing movie releases to maximize box office revenue. In contrast, Fall, with the lowest average gross and profit, may reflect reduced audience engagement during this season.
Analyzing Target Audience and MPAA Rating Impact
# Audience satisfaction and MPAA rating
rating_satisfaction <- data %>%
group_by(MPAA_Rating) %>%
summarize(
avg_rating = mean(Critic_score, na.rm = TRUE),
avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE)
)
# Plot audience satisfaction by MPAA rating
ggplot(rating_satisfaction, aes(x = reorder(MPAA_Rating, avg_rating), y = avg_rating)) +
geom_bar(stat = "identity", fill = "green") +
labs(title = "Audience Satisfaction by MPAA Rating", x = "MPAA Rating", y = "Average Rating")
# Plot ROI by MPAA rating
ggplot(rating_satisfaction, aes(x = reorder(MPAA_Rating, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Worldwide Gross Adj by MPAA Rating", x = "MPAA Rating", y = "Average Worldwide Gross Adj")
The analysis of MPAA ratings provides valuable insights into how movie ratings influence audience satisfaction and revenue generation. The first bar chart, displaying “Audience Satisfaction by MPAA Rating,” highlights that NC-17 rated movies achieve the highest average critic scores. However, movies rated PG and G also perform well in terms of audience satisfaction. This suggests that while NC-17 movies may cater to a niche audience with high approval, family-friendly ratings such as PG and G enjoy broader acceptance.
The second chart, “Worldwide Gross Adj by MPAA Rating,” reveals a contrasting trend. Movies with a G rating generate the highest worldwide gross, indicating their appeal to a universal audience, often including children and families. PG-rated movies follow closely, further solidifying the dominance of family-oriented content in terms of financial performance. Meanwhile, NC-17 movies, despite their high ratings, have minimal financial returns, likely due to limited audience reach and distribution challenges. This underscores the importance of balancing creative vision with market accessibility when deciding on an MPAA rating for a film.
Exploring the Relationship Between Runtime and Profitability
# Relationship between Runtime and ROI
ggplot(data, aes(x = Runtime, y = worldwide_gross_adj)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
labs(title = "Runtime vs. worldwide Gross Adj", x = "Runtime (min)", y = "Worldwide Gross Adj")
## `geom_smooth()` using formula = 'y ~ x'
# Relationship between Runtime and Audience Rating
ggplot(data, aes(x = Runtime, y = IMDb_Rating)) +
geom_point() +
geom_smooth(method = "lm", col = "green") +
labs(title = "Runtime vs. IMDb Rating", x = "Runtime (min)", y = "Audience Rating")
## `geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
library(plotly)
# Create scatter plot with text and regression line for Runtime vs IMDb Rating
a <- ggplot(data, aes(x = Runtime, y = IMDb_Rating, text = movie)) +
geom_point(color = "blue", size = 2, alpha = 0.7) + # Scatter points
geom_smooth(method = "lm", col = "green", se = TRUE) + # Linear regression line
labs(title = "Runtime vs. IMDb Rating",
x = "Runtime (min)",
y = "Audience Rating") +
theme_minimal()
# Convert the ggplot object to an interactive plot using plotly
ggplotly(a)
## `geom_smooth()` using formula = 'y ~ x'
Runtime vs. Worldwide Gross Adjusted: The scatter plot indicates a positive linear trend, suggesting that longer movies tend to generate higher worldwide gross revenue (adjusted). However, the data also demonstrates a significant variance, particularly for shorter runtimes, where some movies achieve high revenues despite shorter durations.
Runtime vs. IMDb Rating: This scatter plot, complemented by a regression line, highlights a moderate positive correlation between runtime and audience rating. Longer movies slightly correlate with better ratings, although exceptions exist. Notably, the density of points for mid-length runtimes (around 100-120 minutes) suggests they are most common and generally well-received.
Interactive Runtime vs. IMDb Rating: The interactive version of the second plot enhances user engagement by allowing movie-specific exploration. This feature helps identify outliers or specific movies with unusually high or low ratings relative to their runtimes.
Genre Combinations and Their Effect on Profitability
# Number of genres associated with each movie and its impact on ROI
ggplot(data, aes(x = genre_count, y = worldwide_gross_adj)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
labs(title = "Number of Genres vs. Worldwide Gross Adj", x = "Number of Genres", y = "Worldwide Gross Adj")
## `geom_smooth()` using formula = 'y ~ x'
# Cross-tabulation of main genres and Worldwide Gross Adj
genre_worldgross <- data %>%
group_by(main_genres) %>%
summarize(avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE))
The visualization illustrates the relationship between the number of genres in a movie and its adjusted worldwide gross. While the linear regression line suggests a slight positive trend, the variation within each genre count highlights that profitability is not solely dictated by the number of genres but also by other factors such as quality, audience appeal, and marketing. This indicates that while combining genres might attract a broader audience, successful execution remains critical. Further analysis of average worldwide gross by specific genres could provide deeper insights into which genres consistently drive higher profitability, regardless of the number of genres involved.
Investigating Director and Cast Influence
# Analyzing Director's impact on ROI
director_worldgross <- data %>%
group_by(Director) %>%
summarize(
avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE),
avg_rating = mean(IMDb_Rating, na.rm = TRUE)
)
# Top 10 Directors by ROI
top_directors <- director_worldgross %>%
arrange(desc(avg_worldwide_gross_adj)) %>%
head(25)
# Plotting Top 10 Directors by ROI
ggplot(top_directors, aes(x = reorder(Director, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Directors by Worldwide Gross Adj", x = "Director", y = "Average Worldwide Gross Adj")
The chart highlights the top directors ranked by their average worldwide gross adjusted, showcasing their significant influence on movie profitability. These results underline the pivotal role a director plays in a film’s financial success, as their vision, storytelling, and ability to assemble talented teams directly impact a movie’s market appeal. Additionally, the diversity in the list reflects a variety of genres and collaborative styles that can lead to financial triumph in the global market.
Analyzing Critic vs. Audience Reception
# Relationship between critic score and audience rating
ggplot(data, aes(x = Critic_score, y = IMDb_Rating)) +
geom_point() +
geom_smooth(method = "lm", col = "purple") +
labs(title = "Critic Score vs. Audience Rating", x = "Critic Score", y = "Audience Rating")
## `geom_smooth()` using formula = 'y ~ x'
The scatterplot illustrates the relationship between critic scores and audience ratings, revealing a positive correlation. As critic scores increase, there is a noticeable upward trend in audience ratings, indicating that movies with higher critic approval tend to resonate well with audiences. This alignment suggests that critical acclaim often translates into audience appreciation, emphasizing the role of critics in shaping viewer perceptions and the potential market reception of a film.
Calculating Risk (Variance) in worldwide gross adjusted
# Variance in Worldwide Gross Adj by Genre
genre_risk <- data %>%
group_by(main_genres) %>%
summarize(worldwide_gross_adj_variance = var(worldwide_gross_adj, na.rm = TRUE))
# Plot Variance in Worldwide Gross Adj by Genre
ggplot(genre_risk, aes(x = reorder(main_genres, worldwide_gross_adj_variance), y = worldwide_gross_adj_variance)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Variance in Worldwide Gross Adj by Genre", x = "Genre", y = "Variance in Worldwide Gross Adj")
The bar chart illustrates the variance in worldwide gross adjusted revenue across different movie genres, offering insight into the financial risks associated with each genre. Genres like “Family” and “Science Fiction” exhibit the highest variance, suggesting significant inconsistency in revenue outcomes—likely influenced by a mix of high-grossing blockbusters and underperforming titles within these categories. In contrast, genres such as “Documentary” and “Music” display minimal variance, indicating more stable but lower revenue performance. These patterns emphasize the trade-off between potential high returns and financial risk when investing in genres with high revenue variability.
Comparing Profitability by MPAA Rating Over Time
# Grouping by MPAA Rating and Year to see trends over time
mpaa_yearly_roi <- data %>%
group_by(year, MPAA_Rating) %>%
summarize(avg_roi = mean(roi_adj, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting MPAA Rating Trends over time
ggplot(mpaa_yearly_roi, aes(x = year, y = avg_roi, color = MPAA_Rating)) +
geom_line() +
labs(title = "MPAA Rating ROI Trends Over Time", x = "Year", y = "Average ROI")
The line chart illustrates the return on investment (ROI) trends over time across different MPAA ratings, providing insight into how profitability varies for movies with different content classifications. “R-rated” movies display significant fluctuations, peaking in certain years, which could be tied to the success of specific high-grossing films. On the other hand, “G” and “PG” rated films maintain relatively steady, albeit lower, ROI levels, reflecting consistent but modest performance. The “PG-13” category, representing a broad audience appeal, shows stable trends with occasional increases, suggesting its reliability as a profitable rating. Such patterns emphasize how audience reach and movie content restrictions interplay to influence financial success.
Production Budget Dynamics
# Visualizing the distribution of production budgets
library(ggplot2)
ggplot(data, aes(x = production_budget_adj)) +
geom_histogram(bins = 30, fill = "blue", alpha = 0.7) +
labs(title = "Distribution of Production Budgets", x = "Production Budget", y = "Count")
# Check correlation between numerical variables and production budget
cor(data[,c("production_budget_adj", "Runtime", "IMDb_Rating")], use = "complete.obs")
## production_budget_adj Runtime IMDb_Rating
## production_budget_adj 1.0000000 0.3658046 0.1363438
## Runtime 0.3658046 1.0000000 0.4030555
## IMDb_Rating 0.1363438 0.4030555 1.0000000
# Boxplot of production budget by genre
ggplot(data, aes(x = main_genres, y = production_budget_adj)) +
geom_boxplot(fill = "lightblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Production Budget by Genre", x = "Main Genre", y = "Production Budget")
The histogram showcasing the distribution of production budgets reveals a right-skewed pattern, with the majority of films clustering in the lower budget range, typically under $50 million. This indicates that most films are produced with modest budgets, likely catering to niche audiences or focusing on smaller-scale storytelling. However, the presence of a long tail highlights a smaller but significant number of high-budget productions, reflecting the blockbuster trend in genres like Action and Science Fiction, where budgets can exceed hundreds of millions to accommodate expansive visual effects and global marketing campaigns.
The boxplot examining production budgets by genre emphasizes the financial disparities across different categories. High-budget genres like Action, Adventure, and Science Fiction stand out with median budgets significantly above other genres, driven by their reliance on cutting-edge technology, extensive special effects, and high-profile cast. On the other hand, genres like Documentary, Comedy, and Horror demonstrate lower budget medians, suggesting they can achieve success through minimalistic setups, character-driven plots, and smaller production teams. These insights indicate that genre choice is a pivotal factor in determining the scale of financial investment required for a film.
Outliers
summary(data$worldwide_gross_adj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.896e+04 2.145e+07 7.299e+07 1.689e+08 1.934e+08 2.048e+09
# Calculate Q1, Q3, and IQR
Q1 <- quantile(data$worldwide_gross_adj, 0.25, na.rm = TRUE)
Q3 <- quantile(data$worldwide_gross_adj, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
# Define lower and upper bounds for outliers
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
# Count the number of outliers
num_outliers <- sum(data$worldwide_gross_adj < lower_bound | data$worldwide_gross_adj > upper_bound, na.rm = TRUE)
# Print the number of outliers
cat("Number of outliers in worldwide_gross_adj:", num_outliers, "\n")
## Number of outliers in worldwide_gross_adj: 128
# Filter data to remove outliers
data <- data %>%
filter(worldwide_gross_adj >= lower_bound & worldwide_gross_adj <= upper_bound)
# Verify removal of outliers
summary(data$worldwide_gross_adj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28956 18269598 60446550 98272452 141985106 448808221
Outliers often represent anomalies such as exceptional blockbusters or poorly performing movies. Removing these extremes helps in ensuring that further analysis reflects the general trends and avoids being skewed by a few extreme cases. However, this decision should be balanced based on the goals of your analysis—outliers can also provide valuable insights into exceptional cases. If the focus is on the typical behavior of movies, outlier removal is beneficial. For analysis of blockbuster success patterns, those outliers might warrant separate study.
summary(data$main_genres)
## Main_Action Main_Adventure Main_Animation
## 196 60 28
## Main_Comedy Main_Crime Main_Documentary
## 236 63 10
## Main_Drama Main_Family Main_Fantasy
## 315 12 26
## Main_History Main_Horror Main_Music
## 9 75 5
## Main_Mystery Main_Romance Main_Science Fiction
## 14 31 21
## Main_Thriller Main_War Main_Western
## 72 10 1
Performing log to worldwide gross
# Apply log transformation to the scaled variable and create a new column
data$Log_worldwide_gross_adj <- log(data$worldwide_gross_adj)
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 82
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 76 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Summary of the log-transformed variable
summary(data$Log_worldwide_gross_adj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.27 16.72 17.92 17.56 18.77 19.92
We are converting the World wide Gross Adj in the Log World wide Gross adj so that the range become less in dependent variable.
Creating the dummies variables for Runtime catagory
# Define the runtime categories (excluding the reference category)
time_cat <- c("90 to 135", "Greater than 135")
# Loop through each runtime category to create binary variables
for (category in time_cat) {
data[[category]] <- ifelse(data$Runtime_category == category, 1, 0)
}
# "Greater than 135" is automatically the reference category
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 84
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 78 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Created the dummies variables for the for the Runtime catagories So we can get the proper results when we drive more deep in the analysis part.
Creating the dummies variables for Seasons
# List of seasons to create binary variables for
Seasons <- c("Spring", "Summer", "Fall")
# Create binary variables for each season
for (season in Seasons) {
data[[season]] <- ifelse(data$Seasons == season, 1, 0)
}
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 87
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 81 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Created the dummies variables for the for the Seasons catagories So we can get the proper results when we drive more deep in the analysis part.
Creating the dummies variables for MPAA Rating
# List of seasons to create binary variables for
ratings <- c("PG-13" ,"R","PG" ,"G")
# Create binary variables for each season
for (rating in ratings) {
data[[rating]] <- ifelse(data$MPAA_Rating == rating, 1, 0)
}
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 91
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 85 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Created the dummies variables for the for the MPAA Rating catagories So we can get the proper results when we drive more deep in the analysis part.
Creating the dummies variables for Genres count
# List of seasons to create binary variables for
Counts <- c("1" ,"2","3" ,"4" ,"5", "6")
# Create binary variables for each season
for (Count in Counts) {
data[[Count]] <- ifelse(data$genre_count == Count, 1, 0)
}
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 97
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 91 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
Created the dummies variables for the for the Genre Count catagories So we can get the proper results when we drive more deep in the analysis part.
which so ever dummies we have made we took 1 of it catagories as a refereance when we will running the models.
doing log to Production budget
# Apply log transformation to Worldwide_Gross_Adj and create a new column
data$Log_production_budget_adj <- log(data$production_budget_adj)
# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 98
## movie year production_budget domestic_gross foreign_gross worldwide_gross
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Solo: A … 2018 275000000 213767512 179383835 393151347
## 2 The Lone… 2013 275000000 89302115 170700000 260002115
## 3 John Car… 2012 275000000 73058679 209719421 282778100
## 4 Battlesh… 2012 220000000 65233400 248244317 313477717
## 5 Robin Ho… 2010 210000000 105487148 216971858 322459006
## 6 Green La… 2011 200000000 116601172 102934320 219535492
## # ℹ 92 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## # profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## # match_key <chr>, popularity <dbl>, release_date <chr>,
## # original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## # vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## # Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## # Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
We are converting the World wide Gross Adj in the Log World wide Gross adj so that the range become less.
Changing the names so it don’t give errors later
# Rename the column
colnames(data)[colnames(data) == "Main_Science Fiction"] <- "Main_Science_Fiction"
colnames(data)[colnames(data) == "PG-13"] <- "PG.13"
colnames(data)[colnames(data) == "90 to 135"] <- "between_90_to_135"
colnames(data)[colnames(data) == "Greater than 135"] <- "Greater_than_135"
# Replace "PG-13" with "PG.13" in the MPAA_Rating column
data$MPAA_Rating <- gsub("PG-13", "PG.13", data$MPAA_Rating)
data$MPAA_Rating <- gsub("NC-17", "NC.17", data$MPAA_Rating)
# Check the unique values in the column to confirm the change
unique(data$MPAA_Rating)
## [1] "PG.13" "PG" "R" "G" "NC.17"
data$main_genres <- gsub("Science Fiction", "Science_Fiction", data$main_genres)
unique(data$main_genres)
## [1] "Main_Action" "Main_Thriller" "Main_Adventure"
## [4] "Main_Fantasy" "Main_Drama" "Main_Science_Fiction"
## [7] "Main_Comedy" "Main_Horror" "Main_Animation"
## [10] "Main_Family" "Main_Crime" "Main_War"
## [13] "Main_History" "Main_Music" "Main_Documentary"
## [16] "Main_Mystery" "Main_Romance" "Main_Western"
data$Runtime_category <- gsub("90 to 135", "90.to.135", data$Runtime_category)
data$Runtime_category <- gsub("Greater than 135", "Greater.than.135", data$Runtime_category)
data$Runtime_category <- gsub("Less than 90", "Less.than.90", data$Runtime_category)
unique(data$Runtime_category)
## [1] "90.to.135" "Greater.than.135" "Less.than.90"
colnames(data)[colnames(data) == "Science Fiction"] <- "Science_Fiction"
# Check if the column has been renamed
colnames(data)
## [1] "movie" "year"
## [3] "production_budget" "domestic_gross"
## [5] "foreign_gross" "worldwide_gross"
## [7] "month" "Seasons"
## [9] "profit" "profit_category"
## [11] "profit_margin" "roi"
## [13] "pct_foreign" "match_key"
## [15] "popularity" "release_date"
## [17] "original_language" "vote_average"
## [19] "vote_count" "vote_ratio"
## [21] "genre_list" "genres"
## [23] "main_genres" "Action"
## [25] "Adventure" "Animation"
## [27] "Comedy" "Crime"
## [29] "Documentary" "Drama"
## [31] "Family" "Fantasy"
## [33] "History" "Horror"
## [35] "Music" "Mystery"
## [37] "Romance" "Science_Fiction"
## [39] "Thriller" "War"
## [41] "Western" "Production_Company"
## [43] "IMDb_Rating" "Distributor"
## [45] "Director" "Cast"
## [47] "Producer" "Screenwriter"
## [49] "MPAA_Rating" "Runtime"
## [51] "Critic_score" "cpi"
## [53] "production_budget_adj" "domestic_gross_adj"
## [55] "foreign_gross_adj" "worldwide_gross_adj"
## [57] "profit_adj" "roi_adj"
## [59] "profit_adj_margin" "genre_count"
## [61] "Runtime_category" "Main_Action"
## [63] "Main_Adventure" "Main_Animation"
## [65] "Main_Comedy" "Main_Crime"
## [67] "Main_Documentary" "Main_Drama"
## [69] "Main_Family" "Main_Fantasy"
## [71] "Main_History" "Main_Horror"
## [73] "Main_Music" "Main_Mystery"
## [75] "Main_Romance" "Main_Science_Fiction"
## [77] "Main_Thriller" "Main_War"
## [79] "Main_Western" "Other_Genres"
## [81] "budget_category" "Log_worldwide_gross_adj"
## [83] "between_90_to_135" "Greater_than_135"
## [85] "Spring" "Summer"
## [87] "Fall" "PG.13"
## [89] "R" "PG"
## [91] "G" "1"
## [93] "2" "3"
## [95] "4" "5"
## [97] "6" "Log_production_budget_adj"
Log-Transformed Distributions: Worldwide Gross and Production Budget
hist(data$Log_worldwide_gross_adj, main = "worldwide gross adj", breaks = 50)
hist(log(data$Log_production_budget_adj + 1), main = "Log-Transformed Budget", breaks = 50)
The histogram for the log-transformed worldwide gross (Log_worldwide_gross_adj) shows a right-skewed distribution that becomes more normalized after applying the log transformation. The data clusters between the values of 14 and 18, suggesting that most movies have an adjusted gross value concentrated within this range. This transformation helps reduce the impact of extreme outliers in the gross values, enabling a better understanding of the central tendencies and variance within the dataset.
For the log-transformed production budget (Log_production_budget_adj), the histogram also indicates a right-skewed distribution that becomes more symmetrical after transformation. The values predominantly range between 2.7 and 3.0, reflecting that most movies have production budgets within a smaller, more consistent range post-logarithmic scaling. This log transformation mitigates the high variance typically associated with production budgets, making it easier to compare across movies.
worldwide catagory
# Categorize Log_worldwide_gross_adj into buckets
data <- data %>%
mutate(Log_Worldwide_Gross_Category = case_when(
Log_worldwide_gross_adj <= 17.19 ~ "Low's",
Log_worldwide_gross_adj > 17.19 & Log_worldwide_gross_adj <= 18.46 ~ "Medium",
Log_worldwide_gross_adj > 18.46 ~ "High's"
))
# Convert to factor
data$Log_Worldwide_Gross_Category <- as.factor(data$Log_Worldwide_Gross_Category)
# Check if the transformation is correct
table(data$Log_Worldwide_Gross_Category)
##
## High's Low's Medium
## 394 395 395
These categories are based on the quantile-based thresholds defined as follows:
By converting the categorized variable into a factor, it ensures that the categories are treated as distinct, non-numeric labels in any subsequent analysis. The categorization results in nearly equal distribution across the three groups, which is useful for comparative analyses across different gross levels.
Multinomial Logistic Regression with Cross-Validation of whole data
# Load required libraries
library(caret)
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: lattice
# Prepare the dataset
set.seed(123) # For reproducibility
data <- as.data.frame(data)
# Define the control method for cross-validation
cv_control <- trainControl(
method = "cv", # Cross-validation method
number = 5, # Number of folds
verboseIter = TRUE, # Display progress
savePredictions = TRUE # Save predictions for analysis
)
# Load required library
library(caret)
library(nnet)
# Define cross-validation control
cv_control <- trainControl(method = "cv", number = 10) # 10-fold cross-validation
# Train a multinomial logistic regression model
cv_model <- train(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = data, # Dataset
method = "multinom", # Multinomial logistic regression
trControl = cv_control # Cross-validation control
)
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 990.595506
## iter 20 value 865.409943
## iter 30 value 836.250687
## iter 40 value 827.050334
## iter 50 value 826.280091
## iter 60 value 826.136525
## final value 826.134017
## converged
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 997.655452
## iter 20 value 924.236974
## iter 30 value 916.048289
## iter 40 value 915.836630
## final value 915.834463
## converged
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 990.603056
## iter 20 value 865.508858
## iter 30 value 836.436249
## iter 40 value 827.307871
## iter 50 value 826.551587
## iter 60 value 826.442214
## final value 826.441395
## converged
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 983.955118
## iter 20 value 869.200988
## iter 30 value 838.300509
## iter 40 value 832.404507
## iter 50 value 831.232362
## iter 60 value 831.061220
## final value 831.059683
## converged
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 992.236018
## iter 20 value 922.946508
## iter 30 value 915.271488
## iter 40 value 914.969517
## final value 914.966578
## converged
## # weights: 81 (52 variable)
## initial value 1168.923475
## iter 10 value 983.964016
## iter 20 value 869.302155
## iter 30 value 838.468071
## iter 40 value 832.614392
## iter 50 value 831.480797
## iter 60 value 831.335441
## final value 831.333251
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 982.516075
## iter 20 value 863.480514
## iter 30 value 828.438313
## iter 40 value 821.458346
## iter 50 value 818.118478
## iter 60 value 817.905595
## final value 817.903700
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 991.971384
## iter 20 value 918.029818
## iter 30 value 910.415086
## iter 40 value 910.251032
## iter 40 value 910.251026
## iter 40 value 910.251026
## final value 910.251026
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 982.526229
## iter 20 value 863.586290
## iter 30 value 828.623542
## iter 40 value 821.700656
## iter 50 value 818.405318
## iter 60 value 818.241964
## iter 70 value 818.235454
## final value 818.234914
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 985.240681
## iter 20 value 886.637121
## iter 30 value 851.831980
## iter 40 value 843.243837
## iter 50 value 842.614085
## iter 60 value 842.499891
## final value 842.497309
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 993.608153
## iter 20 value 935.114138
## iter 30 value 924.018424
## iter 40 value 923.812523
## final value 923.808274
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 985.249689
## iter 20 value 886.720714
## iter 30 value 851.994244
## iter 40 value 843.466456
## iter 50 value 842.855811
## iter 60 value 842.776989
## final value 842.773090
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 985.395430
## iter 20 value 875.835531
## iter 30 value 835.027372
## iter 40 value 826.638501
## iter 50 value 825.882214
## iter 60 value 825.670889
## iter 70 value 825.668305
## iter 70 value 825.668300
## iter 70 value 825.668300
## final value 825.668300
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 993.258511
## iter 20 value 926.342264
## iter 30 value 915.752675
## iter 40 value 915.541075
## final value 915.538111
## converged
## # weights: 81 (52 variable)
## initial value 1172.219312
## iter 10 value 985.403874
## iter 20 value 875.929795
## iter 30 value 835.218393
## iter 40 value 826.898327
## iter 50 value 826.159455
## iter 60 value 825.999527
## final value 825.994860
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 993.927219
## iter 20 value 887.351427
## iter 30 value 858.297717
## iter 40 value 850.912507
## iter 50 value 850.293543
## iter 60 value 850.166990
## final value 850.166034
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 1000.561888
## iter 20 value 937.208742
## iter 30 value 928.964041
## iter 40 value 928.767831
## final value 928.762134
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 993.934294
## iter 20 value 887.445791
## iter 30 value 858.457932
## iter 40 value 851.124361
## iter 50 value 850.523963
## iter 60 value 850.430698
## final value 850.430261
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 977.988497
## iter 20 value 870.717450
## iter 30 value 833.277343
## iter 40 value 825.207127
## iter 50 value 824.500245
## iter 60 value 824.396372
## final value 824.394643
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 986.189054
## iter 20 value 919.454502
## iter 30 value 910.382375
## iter 40 value 910.221738
## final value 910.219392
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 977.997297
## iter 20 value 870.795948
## iter 30 value 833.456326
## iter 40 value 825.444541
## iter 50 value 824.756099
## iter 60 value 824.683704
## final value 824.682467
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 984.020599
## iter 20 value 870.212115
## iter 30 value 839.413658
## iter 40 value 830.939967
## iter 50 value 830.245282
## iter 60 value 830.108396
## final value 830.106525
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 991.631916
## iter 20 value 926.035454
## iter 30 value 916.472420
## iter 40 value 916.251685
## final value 916.246646
## converged
## # weights: 81 (52 variable)
## initial value 1170.022087
## iter 10 value 984.028719
## iter 20 value 870.307638
## iter 30 value 839.590233
## iter 40 value 831.178181
## iter 50 value 830.501950
## iter 60 value 830.399026
## final value 830.398356
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 978.007042
## iter 20 value 881.715118
## iter 30 value 839.572736
## iter 40 value 831.307508
## iter 50 value 830.558397
## iter 60 value 830.429768
## final value 830.427644
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 986.957990
## iter 20 value 924.830437
## iter 30 value 916.176136
## iter 40 value 915.999626
## final value 915.996273
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 978.016593
## iter 20 value 881.796486
## iter 30 value 839.744252
## iter 40 value 831.533956
## iter 50 value 830.807341
## iter 60 value 830.712747
## final value 830.712306
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 994.182148
## iter 20 value 890.746755
## iter 30 value 849.367936
## iter 40 value 839.654161
## iter 50 value 839.004303
## iter 60 value 838.869090
## final value 838.866806
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 1001.068932
## iter 20 value 933.536824
## iter 30 value 925.017021
## iter 40 value 924.848206
## final value 924.845957
## converged
## # weights: 81 (52 variable)
## initial value 1171.120700
## iter 10 value 994.189508
## iter 20 value 890.832889
## iter 30 value 849.550745
## iter 40 value 839.892618
## iter 50 value 839.260620
## iter 60 value 839.164056
## final value 839.161381
## converged
## # weights: 81 (52 variable)
## initial value 1300.756950
## iter 10 value 1091.651296
## iter 20 value 970.787455
## iter 30 value 937.277836
## iter 40 value 928.208085
## iter 50 value 927.291351
## iter 60 value 927.167785
## final value 927.166727
## converged
# View model results
print(cv_model)
## Penalized Multinomial Regression
##
## 1184 samples
## 25 predictor
## 3 classes: 'High's', 'Low's', 'Medium'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 1064, 1064, 1066, 1067, 1067, 1066, ...
## Resampling results across tuning parameters:
##
## decay Accuracy Kappa
## 0e+00 0.6175184 0.4264002
## 1e-04 0.6175184 0.4264002
## 1e-01 0.5836751 0.3756410
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was decay = 1e-04.
# Check cross-validation metrics
cv_results <- cv_model$results
print(cv_results)
## decay Accuracy Kappa AccuracySD KappaSD
## 1 0e+00 0.6175184 0.4264002 0.05269619 0.07907112
## 2 1e-04 0.6175184 0.4264002 0.05269619 0.07907112
## 3 1e-01 0.5836751 0.3756410 0.03719189 0.05591128
# Check variable importance (if available)
if ("varImp" %in% methods("train")) {
var_imp <- varImp(cv_model, scale = FALSE)
print(var_imp)
plot(var_imp)
}
The multinomial logistic regression model was trained to predict Log_Worldwide_Gross_Category using predictors such as the log-transformed production budget, runtime categories, seasonal releases, MPAA ratings, genre counts, and specific genres. Evaluated with 10-fold cross-validation, the model achieved its highest accuracy of 61.75% with a decay value of 0.0001, accompanied by a Kappa statistic of 0.426, indicating moderate agreement between predictions and true values. Increased regularization (decay = 0.1) reduced accuracy to 58.37% and Kappa to 0.375, demonstrating that higher penalization adversely affects performance. The optimal model, selected at a decay of 0.0001, balances accuracy and complexity effectively. These results highlight the moderate predictive power of the model, suggesting potential for improvement by incorporating additional features or exploring alternative approaches.
Spliting the data
# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet) # For Ridge and LASSO regression
## Warning: package 'glmnet' was built under R version 4.3.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.3.1
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-8
library(randomForest) # For Random Forest model
## Warning: package 'randomForest' was built under R version 4.3.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(xgboost) # For Gradient Boosting model
## Warning: package 'xgboost' was built under R version 4.3.3
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:plotly':
##
## slice
## The following object is masked from 'package:dplyr':
##
## slice
library(Metrics) # For evaluation metrics
## Warning: package 'Metrics' was built under R version 4.3.3
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
set.seed(123) # For reproducibility
# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]
Multinomial Logistic Regession
# Load the required library
library(nnet)
# Fit the multinomial logistic regression model
multinom_model <- multinom(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
data = train_data
)
## # weights: 84 (54 variable)
## initial value 909.650975
## iter 10 value 750.668060
## iter 20 value 679.033321
## iter 30 value 655.419161
## iter 40 value 647.812756
## iter 50 value 646.629405
## iter 60 value 646.572253
## iter 70 value 646.549992
## final value 646.549600
## converged
# View model summary
summary(multinom_model)
## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## multinom(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj +
## PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
## data = train_data)
##
## Coefficients:
## (Intercept) Log_production_budget_adj PG.13 R PG
## Low's 54.02857 -2.463531 -10.329502 -9.621212 -10.805911
## Medium 18.34683 -1.387259 6.766761 7.029339 6.214177
## G between_90_to_135 Greater_than_135 Spring Summer
## Low's -60.990149 0.3013782 -0.09367208 0.2540701 -0.2641861
## Medium 7.020628 -0.1343854 -0.26478885 -0.1769784 -0.1774896
## Fall genre_count Main_Action Main_Adventure Main_Animation
## Low's -0.1130515 0.1934268 -2.913081 -1.912802 -2.165656
## Medium -0.1741066 0.2124052 -1.412170 -1.120253 -1.399071
## Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Low's -2.764042 -2.379240 -2.011616 -2.376101 -2.853453
## Medium -1.132786 -0.948553 -1.020207 -1.181713 -0.500758
## Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Low's -3.443417 -5.360688 -4.408292 -1.725017 -3.5826158
## Medium -1.512193 -2.384847 -1.206547 0.182188 -0.7490445
## Main_Thriller Main_History
## Low's -3.178588 -1.509629
## Medium -1.541316 -37.452230
##
## Std. Errors:
## (Intercept) Log_production_budget_adj PG.13 R PG
## Low's 2.666312 0.1877811 0.9094408 0.9018218 0.9492838
## Medium 2.500942 0.1600283 0.6805335 0.6723222 0.6934545
## G between_90_to_135 Greater_than_135 Spring Summer
## Low's NaN 0.4597054 0.7195260 0.3515061 0.3614150
## Medium 1.099029 0.3781664 0.5465227 0.2879162 0.2830429
## Fall genre_count Main_Action Main_Adventure Main_Animation
## Low's 0.3332468 0.1337842 1.412635 1.511721 1.703619
## Medium 0.2712032 0.1094383 1.349323 1.397321 1.492783
## Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Low's 1.417273 1.469423 2.354495 1.407818 1.930024
## Medium 1.359308 1.401000 2.142253 1.354289 1.590654
## Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Low's 1.903964 1.498765 1.951964 1.631140 1.738463
## Medium 1.516519 1.408728 1.568679 1.542214 1.518612
## Main_Thriller Main_History
## Low's 1.460582 1.992374e+00
## Medium 1.395043 2.262602e-14
##
## Residual Deviance: 1293.099
## AIC: 1401.099
The multinomial logistic regression was applied to classify the Log_Worldwide_Gross_Category into “High’s,” “Medium,” and “Low’s” based on predictors such as Log_production_budget_adj, runtime categories, season categories, MPAA ratings, and main genres. The model converged after 70 iterations with a residual deviance of 1293.099 and an AIC of 1401.099, indicating the model’s fit.
Key insights from the coefficients include:
Production Budget: Higher Log_production_budget_adj negatively impacts the probability of being classified in “Low’s” and “Medium” categories compared to the reference “High’s.” Genre Influence: Specific genres like Main_Horror and Main_Fantasy show strong negative associations with “Low’s” and “Medium” categories, suggesting these genres are less likely to result in low revenue. Runtime and Season Effects: The categories like “between 90 to 135 minutes” and Summer season do not show significant effects, as the coefficients are close to zero with high standard errors, indicating potential variability in their impact. MPAA Ratings: Ratings such as PG-13 and PG have strong associations, with PG-13 reducing the likelihood of belonging to the “Low’s” category. The standard errors for some coefficients are notably high, particularly for rare categories such as Main_History and Main_Documentary, which could indicate instability due to smaller representation in the dataset. Overall, the model provides reasonable predictions but may require additional refinement or regularization for improved generalizability.
Evaluation of Multinomial Logistic Regression Model
library(pROC)
## Warning: package 'pROC' was built under R version 4.3.2
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:Metrics':
##
## auc
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")
# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
reference = factor(test_data$Log_Worldwide_Gross_Category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 84 6 35
## Low's 10 81 29
## Medium 29 27 55
##
## Overall Statistics
##
## Accuracy : 0.618
## 95% CI : (0.5653, 0.6687)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.4268
##
## Mcnemar's Test P-Value : 0.6517
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.6829 0.7105 0.4622
## Specificity 0.8240 0.8388 0.7637
## Pos Pred Value 0.6720 0.6750 0.4955
## Neg Pred Value 0.8312 0.8602 0.7388
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2360 0.2275 0.1545
## Detection Prevalence 0.3511 0.3371 0.3118
## Balanced Accuracy 0.7535 0.7747 0.6129
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
# ROC Curve
roc_obj_test <- roc(true_binary, predicted_probs)
roc_list_test[[category]] <- roc_obj_test
auc_list_test[[category]] <- auc(roc_obj_test)
# Plot ROC Curve for this class
plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's on Test Data: 0.8448306
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Low's on Test Data: 0.8601747
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Medium on Test Data: 0.6920009
# Lift Chart for each category
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
# Add deciles for the chosen category
test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
# Calculate Lift
lift_table_test <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Log_Worldwide_Gross_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
)
# Plot Lift Chart for the current category
plot(
lift_table_test$decile, lift_table_test$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category, "on Test Data")
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}
Interpretation of Multinomial Logistic Regression Model with Lift Charts and ROC Analysis The multinomial logistic regression model shows an overall accuracy of 61.8% with a Kappa statistic of 0.4268, which indicates moderate agreement between predicted and observed categories. The model’s performance varies by class, with the sensitivity for “High’s” at 68.29%, “Low’s” at 71.05%, and “Medium” at 46.22%. The specificity values suggest the model can reasonably distinguish between classes, with “High’s” and “Low’s” achieving 82.4% and 83.88%, respectively. The positive predictive values (PPVs) indicate that predictions for “High’s” and “Low’s” are more reliable compared to “Medium,” which has lower sensitivity and PPV.
The ROC analysis reveals that the model performs best for “Low’s” with an AUC of 0.86, followed by “High’s” with 0.8448, and “Medium” lagging at 0.692. This suggests the model is more effective at classifying “High’s” and “Low’s” compared to “Medium,” where there is room for improvement. The lift charts for “High’s,” “Low’s,” and “Medium” classes indicate that the model’s cumulative gains are consistent with a random classifier, suggesting that its ability to rank predictions might not be significantly better than random.
While the model shows reasonable discrimination for “High’s” and “Low’s,” the “Medium” category presents a challenge due to its lower balanced accuracy and AUC. This discrepancy might stem from overlapping features between “Medium” and the other categories, requiring further feature engineering or exploration of alternative algorithms. Overall, the model is moderately effective but could benefit from refinement, especially in handling the “Medium” class predictions.
Random Forest
# Load the required library
library(randomForest)
# Train Random Forest Model for Classification
rf_model <- randomForest(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
ntree = 500, # Number of trees
mtry = 5, # Number of predictors randomly selected at each split
importance = TRUE, # Calculate variable importance
proximity = TRUE # Enable proximity matrix for better insights
)
# View the model summary
print(rf_model)
##
## Call:
## randomForest(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, ntree = 500, mtry = 5, importance = TRUE, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 42.27%
## Confusion matrix:
## High's Low's Medium class.error
## High's 174 27 70 0.3579336
## Low's 25 189 67 0.3274021
## Medium 73 88 115 0.5833333
# Feature Importance
importance(rf_model)
## High's Low's Medium
## Log_production_budget_adj 54.23404066 58.3964024 16.6099620
## PG.13 7.23391587 5.8022759 0.1428070
## R 10.66811130 8.4856570 -0.6918324
## PG 9.58714900 3.8205036 -2.7065644
## G -1.95595955 4.3647583 0.6583091
## between_90_to_135 3.45872879 -3.9251462 1.0197417
## Greater_than_135 5.41925679 6.1085477 2.0768173
## Spring -2.22544249 3.2519120 -0.2645075
## Summer -0.40877224 2.4932487 -2.8785964
## Fall -1.86660551 2.9856875 2.8811094
## genre_count 8.28023430 3.6306634 -0.6670277
## Main_Action 13.74783553 10.7553413 -1.8980100
## Main_Adventure 4.98002373 3.9423308 -1.2316089
## Main_Animation 5.36253608 -0.1803412 -4.8961871
## Main_Comedy 6.71286257 2.1285906 -3.2960200
## Main_Crime 2.30217102 1.1251912 -1.2319112
## Main_Documentary 1.29123649 3.5155053 -2.3727848
## Main_Drama 7.28313497 6.0722752 -4.5410771
## Main_Family -4.68116187 -3.4223355 -1.6401665
## Main_Fantasy -1.79537739 -0.5137727 -2.3141812
## Main_Horror 3.55771593 -7.2675268 3.3127806
## Main_Mystery -3.21265432 3.1600207 2.5766902
## Main_History -0.01441111 1.6539941 4.8726468
## Main_Romance 8.32859461 1.1203048 -0.3211193
## Main_Science_Fiction 1.69248262 -4.2542209 2.8594548
## Main_Thriller 3.53457829 3.0926812 -4.4366351
## MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj 66.5823632 161.6311816
## PG.13 9.4199603 7.0313273
## R 12.5720032 11.9967875
## PG 8.3055515 6.5648208
## G 0.9334809 1.2709370
## between_90_to_135 0.3034235 9.3321449
## Greater_than_135 7.8985043 4.8645679
## Spring 0.7873428 9.3040851
## Summer -0.4483334 9.3873777
## Fall 3.1305884 9.3192971
## genre_count 6.5357738 28.0454828
## Main_Action 14.6850374 10.5656676
## Main_Adventure 4.8134016 4.0267356
## Main_Animation 2.2254825 1.8529323
## Main_Comedy 2.9902262 7.0837292
## Main_Crime 1.0450720 4.5569799
## Main_Documentary 1.9491483 1.4695295
## Main_Drama 6.1428315 9.3212568
## Main_Family -5.6971242 0.9853271
## Main_Fantasy -2.8793950 2.0103055
## Main_Horror -0.9534465 6.5876137
## Main_Mystery 1.8732446 2.5944616
## Main_History 2.7409357 2.1725445
## Main_Romance 4.8958916 4.8982248
## Main_Science_Fiction 0.2847141 2.6531996
## Main_Thriller 1.0071114 5.4797721
varImpPlot(rf_model) # Plot variable importance
The Random Forest model for classifying the “Log_Worldwide_Gross_Category” demonstrates an overall out-of-bag (OOB) error rate of 42.27%, indicating that the model struggles to classify certain instances accurately. The class-specific errors reveal that “High’s” and “Low’s” are classified with relatively lower error rates of 35.79% and 32.74%, respectively, whereas the “Medium” category has a much higher error rate of 58.33%. This disparity highlights the model’s difficulty in distinguishing “Medium” instances, likely due to feature overlap with other categories.
The variable importance plot reveals that “Log_production_budget_adj” is the most significant predictor, contributing the most to both accuracy and the Gini impurity reduction. Other notable predictors include “genre_count,” “Main_Action,” “Greater_than_135,” and “Main_Romance.” These variables are likely strong indicators of the differences between the gross categories. Conversely, features such as “Main_Fantasy,” “Main_Family,” and “Main_Animation” contribute less, suggesting they may be less relevant for this classification task.
Despite its ability to leverage multiple predictors and provide insights into feature importance, the Random Forest model exhibits moderate classification performance.
Evaluation of the Random Forest Model
# Predict on the test data
test_data$rf_predicted_categories <- predict(rf_model, newdata = test_data)
# Confusion Matrix
rf_confusion_matrix_test <- confusionMatrix(
data = factor(test_data$rf_predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
reference = factor(test_data$Log_Worldwide_Gross_Category)
)
print(rf_confusion_matrix_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 85 10 34
## Low's 12 77 36
## Medium 26 27 49
##
## Overall Statistics
##
## Accuracy : 0.5927
## 95% CI : (0.5397, 0.6442)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.389
##
## Mcnemar's Test P-Value : 0.4691
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.6911 0.6754 0.4118
## Specificity 0.8112 0.8017 0.7764
## Pos Pred Value 0.6589 0.6160 0.4804
## Neg Pred Value 0.8326 0.8398 0.7244
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2388 0.2163 0.1376
## Detection Prevalence 0.3624 0.3511 0.2865
## Balanced Accuracy 0.7511 0.7385 0.5941
# ROC Curve and AUC for each class
rf_roc_list_test <- list()
rf_auc_list_test <- list()
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
rf_predicted_probs <- predict(rf_model, newdata = test_data, type = "prob")[, category]
# ROC Curve
rf_roc_obj_test <- roc(true_binary, rf_predicted_probs)
rf_roc_list_test[[category]] <- rf_roc_obj_test
rf_auc_list_test[[category]] <- auc(rf_roc_obj_test)
# Plot ROC Curve for this class
plot(rf_roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "red")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, "on Test Data:", rf_auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's on Test Data: 0.8363341
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Low's on Test Data: 0.8235283
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Medium on Test Data: 0.6586888
# Lift Chart for each category
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predict(rf_model, newdata = test_data, type = "prob")[, category]))
# Add deciles for the chosen category
test_data$rf_decile <- ntile(predict(rf_model, newdata = test_data, type = "prob")[, category], 10)
# Calculate Lift
rf_lift_table_test <- test_data %>%
group_by(rf_decile) %>%
summarize(
total = n(),
events = sum(Log_Worldwide_Gross_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
)
# Plot Lift Chart for the current category
plot(
rf_lift_table_test$rf_decile, rf_lift_table_test$cumulative_percentage,
type = "o", col = "red", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category, "on Test Data")
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}
The random forest model for classifying “Log_Worldwide_Gross_Category” achieved an overall accuracy of 59.27% on the test data with a Kappa statistic of 0.389, indicating moderate agreement beyond chance. The AUC values for “High’s,” “Low’s,” and “Medium” categories were 0.836, 0.824, and 0.659, respectively. These AUC values suggest the model performs well for distinguishing “High’s” and “Low’s” but struggles with “Medium” predictions.
From the confusion matrix, the sensitivity (true positive rate) was highest for “High’s” (69.11%), followed by “Low’s” (67.54%) and “Medium” (41.18%). Specificity (true negative rate) was consistently higher across classes, indicating the model’s strength in identifying negative cases. The lift charts show relatively stable cumulative gains across deciles for all categories, which suggests modest differentiation capability for high-probability deciles. However, the random forest exhibited challenges in perfectly separating “Medium” due to overlapping class probabilities, as reflected in the lower sensitivity and AUC for this category.
The random forest model demonstrates reliable performance for the “High’s” and “Low’s” categories. The variable importance plot highlights “Log_production_budget_adj” and “genre_count” as the top predictors, emphasizing their significance in determining the gross revenue category.
XGBoost
# Load necessary libraries
library(xgboost)
# Prepare data for XGBoost
x_train <- model.matrix(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data
)[, -1] # Remove intercept column
# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$Log_Worldwide_Gross_Category) - 1
# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
data = as.matrix(x_train),
label = y_train,
objective = "multi:softprob", # Multi-class classification
num_class = length(levels(train_data$Log_Worldwide_Gross_Category)), # Number of classes
nrounds = 100, # Number of boosting rounds
max_depth = 6, # Tree depth
eta = 0.1, # Learning rate
colsample_bytree = 0.8, # Subsample ratio of columns
verbose = 1 # Print training progress
)
## [1] train-mlogloss:1.044219
## [2] train-mlogloss:1.014600
## [3] train-mlogloss:0.981189
## [4] train-mlogloss:0.951196
## [5] train-mlogloss:0.923517
## [6] train-mlogloss:0.889304
## [7] train-mlogloss:0.860176
## [8] train-mlogloss:0.842175
## [9] train-mlogloss:0.822921
## [10] train-mlogloss:0.800008
## [11] train-mlogloss:0.780153
## [12] train-mlogloss:0.766195
## [13] train-mlogloss:0.748585
## [14] train-mlogloss:0.735677
## [15] train-mlogloss:0.720366
## [16] train-mlogloss:0.710524
## [17] train-mlogloss:0.697816
## [18] train-mlogloss:0.688281
## [19] train-mlogloss:0.677437
## [20] train-mlogloss:0.667779
## [21] train-mlogloss:0.662752
## [22] train-mlogloss:0.656260
## [23] train-mlogloss:0.647030
## [24] train-mlogloss:0.638480
## [25] train-mlogloss:0.630084
## [26] train-mlogloss:0.622255
## [27] train-mlogloss:0.615014
## [28] train-mlogloss:0.608129
## [29] train-mlogloss:0.603235
## [30] train-mlogloss:0.596009
## [31] train-mlogloss:0.591830
## [32] train-mlogloss:0.585533
## [33] train-mlogloss:0.581276
## [34] train-mlogloss:0.577733
## [35] train-mlogloss:0.574517
## [36] train-mlogloss:0.571123
## [37] train-mlogloss:0.566720
## [38] train-mlogloss:0.563134
## [39] train-mlogloss:0.559574
## [40] train-mlogloss:0.555446
## [41] train-mlogloss:0.551674
## [42] train-mlogloss:0.547738
## [43] train-mlogloss:0.543440
## [44] train-mlogloss:0.539785
## [45] train-mlogloss:0.536030
## [46] train-mlogloss:0.533126
## [47] train-mlogloss:0.530018
## [48] train-mlogloss:0.528901
## [49] train-mlogloss:0.526127
## [50] train-mlogloss:0.523214
## [51] train-mlogloss:0.521795
## [52] train-mlogloss:0.519638
## [53] train-mlogloss:0.517398
## [54] train-mlogloss:0.515160
## [55] train-mlogloss:0.512116
## [56] train-mlogloss:0.510168
## [57] train-mlogloss:0.508645
## [58] train-mlogloss:0.506789
## [59] train-mlogloss:0.504495
## [60] train-mlogloss:0.502816
## [61] train-mlogloss:0.501197
## [62] train-mlogloss:0.499407
## [63] train-mlogloss:0.498239
## [64] train-mlogloss:0.496254
## [65] train-mlogloss:0.494156
## [66] train-mlogloss:0.492314
## [67] train-mlogloss:0.489519
## [68] train-mlogloss:0.487633
## [69] train-mlogloss:0.485190
## [70] train-mlogloss:0.483455
## [71] train-mlogloss:0.482276
## [72] train-mlogloss:0.480786
## [73] train-mlogloss:0.479804
## [74] train-mlogloss:0.478515
## [75] train-mlogloss:0.476099
## [76] train-mlogloss:0.474205
## [77] train-mlogloss:0.472692
## [78] train-mlogloss:0.471695
## [79] train-mlogloss:0.469626
## [80] train-mlogloss:0.468345
## [81] train-mlogloss:0.466787
## [82] train-mlogloss:0.465979
## [83] train-mlogloss:0.464638
## [84] train-mlogloss:0.462384
## [85] train-mlogloss:0.460966
## [86] train-mlogloss:0.459638
## [87] train-mlogloss:0.458490
## [88] train-mlogloss:0.457232
## [89] train-mlogloss:0.455864
## [90] train-mlogloss:0.454534
## [91] train-mlogloss:0.453183
## [92] train-mlogloss:0.452209
## [93] train-mlogloss:0.450001
## [94] train-mlogloss:0.449032
## [95] train-mlogloss:0.448240
## [96] train-mlogloss:0.446901
## [97] train-mlogloss:0.445530
## [98] train-mlogloss:0.443044
## [99] train-mlogloss:0.440377
## [100] train-mlogloss:0.438573
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
## Feature Gain Cover Frequency
## 1: Log_production_budget_adj 0.6571360958 0.540024484 0.444935869
## 2: genre_count 0.0597556999 0.076114894 0.102167183
## 3: Main_Horror 0.0284012203 0.035320571 0.031107180
## 4: Fall 0.0240605515 0.012550804 0.050567595
## 5: R 0.0228187565 0.021416049 0.025947221
## 6: Main_Comedy 0.0217120987 0.014437870 0.027126640
## 7: Summer 0.0210226708 0.018750205 0.039510541
## 8: PG.13 0.0207653218 0.013231485 0.036856848
## 9: Main_Action 0.0201254750 0.017748169 0.024620374
## 10: Main_Drama 0.0184599494 0.014212020 0.019607843
## 11: between_90_to_135 0.0181312740 0.012659661 0.033466018
## 12: Spring 0.0142384274 0.015689948 0.031991744
## 13: Main_Romance 0.0127818434 0.033868427 0.016659295
## 14: Main_Thriller 0.0118235010 0.013440066 0.018280997
## 15: Main_Crime 0.0112919867 0.016242604 0.017838714
## 16: PG 0.0079206403 0.007379108 0.014300457
## 17: Greater_than_135 0.0073522408 0.009674671 0.010025063
## 18: Main_History 0.0053744942 0.034047648 0.010025063
## 19: Main_Adventure 0.0053313633 0.013976867 0.011204482
## 20: Main_Science_Fiction 0.0035834036 0.029034035 0.007961079
## 21: Main_Mystery 0.0019436315 0.008069030 0.004570249
## 22: G 0.0017177299 0.013592940 0.005012531
## 23: Main_Fantasy 0.0013849323 0.006005137 0.006929087
## 24: Main_Family 0.0011820761 0.017149667 0.005749668
## 25: Main_Documentary 0.0009270498 0.002544168 0.001621701
## 26: Main_Animation 0.0007575660 0.002819470 0.001916556
## Feature Gain Cover Frequency
xgb.plot.importance(importance)
The XGBoost model for multi-class classification highlights the feature “Log_production_budget_adj” as the most critical predictor with the highest gain (65.7%), indicating its dominant role in distinguishing revenue categories. Other significant contributors include “genre_count” (5.9%), “Main_Horror” (2.8%), and “Fall” (2.4%). Gain represents the improvement in model accuracy attributed to a feature, while cover measures how often a feature is used in splits. Despite the dominance of “Log_production_budget_adj,” the contributions of genre-related and temporal features suggest revenue prediction is also influenced by qualitative content and seasonal timing. XGBoost’s flexible tree structure captures interactions effectively, offering a robust performance for multi-class categorization. This feature importance plot underscores the multi-dimensional nature of the revenue categories.
Evaluation of XGBoost Model
# Prepare test data matrix
x_test <- model.matrix(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = test_data
)[, -1]
# Encode test labels as numeric (0-based)
y_test <- as.numeric(test_data$Log_Worldwide_Gross_Category) - 1
# Predict probabilities for test data
pred_probs <- predict(xgb_model, newdata = x_test)
# Reshape predicted probabilities into a matrix
pred_matrix <- matrix(pred_probs, nrow = nrow(x_test), byrow = TRUE)
# Get predicted classes (1-based indexing)
pred_classes <- max.col(pred_matrix) - 1 # Convert from 1-based to 0-based indexing for compatibility
# Convert to factor using the original levels of the dependent variable
test_data$predicted_categories <- factor(pred_classes, labels = levels(test_data$Log_Worldwide_Gross_Category))
cat("Rows in predicted categories:", length(test_data$predicted_categories), "\n")
## Rows in predicted categories: 356
cat("Rows in test data:", nrow(test_data), "\n")
## Rows in test data: 356
library(caret)
confusion_matrix <- confusionMatrix(
data = test_data$predicted_categories,
reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 88 6 30
## Low's 6 72 26
## Medium 29 36 63
##
## Overall Statistics
##
## Accuracy : 0.6264
## 95% CI : (0.5739, 0.6768)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.439
##
## Mcnemar's Test P-Value : 0.6526
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.7154 0.6316 0.5294
## Specificity 0.8455 0.8678 0.7257
## Pos Pred Value 0.7097 0.6923 0.4922
## Neg Pred Value 0.8491 0.8333 0.7544
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2472 0.2022 0.1770
## Detection Prevalence 0.3483 0.2921 0.3596
## Balanced Accuracy 0.7805 0.7497 0.6276
# ROC Curve and AUC for each class
library(pROC)
# Initialize lists to store results
roc_list <- list()
auc_list <- list()
# Iterate through categories
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
# Extract predicted probabilities for the current category
predicted_probs <- pred_matrix[, which(categories == category)]
# ROC Curve
roc_obj <- roc(true_binary, predicted_probs)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC Curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8632018
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Low's : 0.8365412
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Medium : 0.6993937
# Lift Chart for each category
library(dplyr)
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(pred_matrix[, which(categories == category)]))
# Add deciles for the chosen category
test_data$decile <- ntile(pred_matrix[, which(categories == category)], 10)
# Calculate Lift
lift_table <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Log_Worldwide_Gross_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
)
# Plot Lift Chart
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(0, 0.1, col = "gray", lty = 2) # Reference line
}
The XGBoost model’s performance for predicting the Log_Worldwide_Gross_Category on the test data is moderately successful, with an overall accuracy of 62.64% and a Kappa statistic of 0.439, reflecting moderate agreement between predicted and actual categories. Here’s a breakdown of its key performance indicators and observations:
Overall Performance:
The model correctly classified 62.64% of the test instances, exceeding the no-information rate of 34.55%. The Kappa value of 0.439 indicates a reasonable level of agreement between predictions and actual categories after accounting for random chance.
Class-Specific Metrics:
“High’s” Class: Sensitivity: 71.54% – the model correctly identified 71.54% of the true “High’s”. Specificity: 84.55% – it correctly rejected 84.55% of non-“High’s”. AUC: 0.863 – excellent performance in distinguishing “High’s” from other categories.
“Low’s” Class: Sensitivity: 63.16% – the model identified 63.16% of the true “Low’s”. Specificity: 86.78% – strong at rejecting non-“Low’s”. AUC: 0.837 – good discriminative power for this class.
“Medium” Class: Sensitivity: 52.94% – weakest performance in identifying “Medium”. Specificity: 72.57% – moderate ability to correctly reject non-“Medium”. AUC: 0.699 – fair performance but less effective than for the other two classes.
Feature Importance:
The most influential feature is Log_production_budget_adj, contributing 65.71% gain, indicating that production budgets are a significant determinant of a movie’s gross category. genre_count ranks second with a gain of 5.98%, showing that the number of genres in a movie also impacts its gross category. Other notable features include Main_Horror, Fall, and R, though their contributions are considerably smaller. Confusion Matrix Insights:
The model performed best on the “High’s” category, correctly classifying 88 out of 123 instances. Misclassifications are most frequent in the “Medium” category, where 65 instances were incorrectly labeled as “High’s” or “Low’s”.
Lift Charts:
The lift charts for all categories show that the model’s predictions do not significantly exceed random prediction in higher deciles. This indicates that while the model performs moderately well, it has limited power in prioritizing instances with high probabilities of being correctly classified.
polynonial
# Fit a polynomial logistic regression model
library(nnet)
polynomial_logistic_model <- multinom(
Log_Worldwide_Gross_Category ~ poly(Log_production_budget_adj, degree = 2) +
PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History +
Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights: 87 (56 variable)
## initial value 909.650975
## iter 10 value 798.718851
## iter 20 value 708.026565
## iter 30 value 652.817268
## iter 40 value 637.902038
## iter 50 value 634.820394
## iter 60 value 634.569618
## iter 70 value 634.561714
## final value 634.555551
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = Log_Worldwide_Gross_Category ~ poly(Log_production_budget_adj,
## degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_History + Main_History + Main_Romance + Main_Science_Fiction +
## Main_Thriller, data = train_data, maxit = 1000)
##
## Coefficients:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Low's 7.057289 -75.27698
## Medium -1.896867 -31.67634
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R
## Low's -29.28044 -5.154952 -4.476388
## Medium -26.87965 3.349820 3.573899
## PG G between_90_to_135 Greater_than_135 Spring
## Low's -5.630926 -12.086795 0.2828976 -0.02843191 0.2644157
## Medium 2.777515 3.595399 -0.1413354 -0.19822236 -0.1570733
## Summer Fall genre_count Main_Action Main_Adventure
## Low's -0.2203033 -0.1295543 0.2275012 -3.132174 -1.861444
## Medium -0.1345413 -0.1799153 0.2441743 -1.583681 -1.012582
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Low's -2.309754 -3.061335 -2.637638 -2.0951053 -2.681637
## Medium -1.498046 -1.359556 -1.155372 -0.9881132 -1.430312
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Low's -3.1743580 -3.478602 -5.301919 -4.726698 -1.668444
## Medium -0.7303407 -1.500829 -2.329913 -1.441521 -10.235846
## Main_Romance Main_Science_Fiction Main_Thriller
## Low's -2.066832 -3.5918702 -3.479934
## Medium -0.108060 -0.6376244 -1.762021
##
## Std. Errors:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Low's 31.440511 6.542746
## Medium 7.968938 5.940044
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R
## Low's 4.779280 31.402599 31.401985
## Medium 5.373912 7.892837 7.893843
## PG G between_90_to_135 Greater_than_135 Spring
## Low's 31.404980 41.126023 0.4501671 0.7233987 0.3589477
## Medium 7.893714 7.937537 0.3784103 0.5563157 0.2959257
## Summer Fall genre_count Main_Action Main_Adventure
## Low's 0.3680146 0.3394367 0.1364965 1.508249 1.612416
## Medium 0.2919421 0.2773746 0.1122283 1.416612 1.466495
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Low's 1.806828 1.513753 1.564187 2.400066 1.504440
## Medium 1.568365 1.426653 1.466844 2.216188 1.421635
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Low's 2.016163 1.988465 1.577897 2.025014 2.159766
## Medium 1.662983 1.588028 1.468938 1.632632 63.086958
## Main_Romance Main_Science_Fiction Main_Thriller
## Low's 1.708711 1.839572 1.556529
## Medium 1.599106 1.608725 1.463149
##
## Residual Deviance: 1269.111
## AIC: 1381.111
The polynomial logistic regression model successfully introduced a second-degree term for the Log_production_budget_adj variable to account for potential nonlinear relationships between predictors and the target variable.
Model Fit The model converged after 70 iterations, achieving a residual deviance of 1269.111 and an AIC of 1381.111. These values indicate a reasonable fit, with reduced deviance compared to linear models, suggesting that the inclusion of the polynomial term captures additional variance.
Key Predictors
Log_production_budget_adj (Polynomial Term):
The first-degree polynomial term is significantly negative for “Low’s” (-75.28) and “Medium” (-31.68), indicating that increasing production budget reduces the likelihood of these categories.
The second-degree term is also negative but smaller in magnitude, confirming a diminishing nonlinear effect of the budget on the likelihood of belonging to “Low’s” and “Medium.”
Categorical Variables:
The MPAA rating categories (PG.13, R, PG, G) have consistent impacts across the outcome categories. For instance, the coefficients for PG.13 show a strong negative impact on “Low’s” (-5.15) while being moderately positive for “Medium” (3.35). Genre-based variables like Main_Comedy, Main_Horror, and Main_Action show notable negative effects on “Low’s” and smaller effects for “Medium.”
Seasonal Variables:
Features like Spring and Fall have small coefficients, indicating limited impact on classification outcomes.
Evaluation of Polynomial Logistic Regression
# Predict categories on the test data
test_data$predicted_categories <- predict(polynomial_logistic_model, newdata = test_data)
# Predict probabilities for each class
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")
# Confusion Matrix
library(caret)
confusion_matrix <- confusionMatrix(
data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 79 4 29
## Low's 10 80 29
## Medium 34 30 61
##
## Overall Statistics
##
## Accuracy : 0.618
## 95% CI : (0.5653, 0.6687)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.4272
##
## Mcnemar's Test P-Value : 0.3939
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.6423 0.7018 0.5126
## Specificity 0.8584 0.8388 0.7300
## Pos Pred Value 0.7054 0.6723 0.4880
## Neg Pred Value 0.8197 0.8565 0.7489
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2219 0.2247 0.1713
## Detection Prevalence 0.3146 0.3343 0.3511
## Balanced Accuracy 0.7503 0.7703 0.6213
library(pROC)
# ROC Curve and AUC for each class
roc_list <- list()
auc_list <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
predicted_probs <- predicted_probabilities[, category]
# ROC Curve
roc_obj <- roc(true_binary, predicted_probs)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC Curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2)
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8523326
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Low's : 0.8615884
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Medium : 0.7074957
# Lift Chart for each category
library(dplyr)
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predicted_probabilities[, which(categories == category)]))
# Add deciles for the chosen category
test_data$decile <- ntile(predicted_probabilities[, which(categories == category)], 10)
# Calculate Lift
lift_table <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Log_Worldwide_Gross_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
)
# Plot Lift Chart
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(0, 0.1, col = "gray", lty = 2) # Reference line
}
The polynomial logistic regression model was evaluated on test data, and its performance metrics were captured through a confusion matrix, ROC curves, and a lift chart for the “Good” category.
Confusion Matrix and Classification Metrics
Overall Accuracy: The model achieved an accuracy of 45.51%, which is close to the No Information Rate (NIR) of 45.79%, indicating that the model struggles to outperform random guessing for the majority class.
Kappa Statistic: The kappa value of 0.1212 suggests weak agreement between the predicted and actual classes beyond chance. McNemar’s Test: The p-value of 0.02813 indicates significant differences in classification errors for paired observations, suggesting the model’s misclassification patterns are not random.
Performance by Class:
Excellent:
Sensitivity: 33.78%, indicating the model identifies only about a third of actual “Excellent” instances correctly. Specificity: 85.11%, suggesting the model is good at identifying instances not belonging to this category. AUC: 0.6812, which reflects moderate discriminative ability for the “Excellent” class.
Good:
Sensitivity: 57.06%, showing the model is better at identifying “Good” instances compared to the other classes. Specificity: 44.56%, highlighting challenges in distinguishing “Good” from other classes. AUC: 0.5363, close to random guessing, showing weak predictive ability for “Good.”
Poor:
Sensitivity: 36.97%, indicating limited ability to correctly classify “Poor” instances. Specificity: 81.01%, meaning the model can exclude instances not belonging to “Poor” relatively well. AUC: 0.6851, slightly better but still moderate performance.
Decision tree
# Load required libraries
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.2
# Train Decision Tree for Classification
dt_model_categorical <- rpart(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "class", # Classification tree
control = rpart.control(
cp = 0.003, # Smaller complexity parameter for more splits
maxdepth = 10, # Allow deeper trees
minsplit = 10 # Minimum observations required to split
)
)
# Plot the decision tree
rpart.plot(
dt_model_categorical,
type = 3, # Show splits and probabilities
extra = 101, # Display n, % observations, and class probabilities
under = TRUE, # Show text under the nodes
fallen.leaves = TRUE, # Spread the leaves horizontally
box.palette = "Blues" # Color scheme for the boxes
)
# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj +
## PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data,
## method = "class", control = rpart.control(cp = 0.003, maxdepth = 10,
## minsplit = 10))
## n= 828
##
## CP nsplit rel error xerror xstd
## 1 0.323583181 0 1.0000000 1.0530165 0.02420522
## 2 0.109689214 1 0.6764168 0.6800731 0.02616685
## 3 0.011578306 2 0.5667276 0.6581353 0.02607784
## 4 0.006398537 5 0.5319927 0.5850091 0.02561559
## 5 0.005484461 7 0.5191956 0.5813528 0.02558563
## 6 0.003656307 8 0.5137112 0.5868373 0.02563032
## 7 0.003046923 19 0.4716636 0.6087751 0.02579421
## 8 0.003000000 23 0.4570384 0.6270567 0.02591287
##
## Variable importance
## Log_production_budget_adj genre_count Main_Action
## 69 7 5
## Main_Adventure PG Main_Drama
## 4 3 2
## Fall Main_Comedy Main_Horror
## 2 2 2
## Main_Animation R PG.13
## 1 1 1
##
## Node number 1: 828 observations, complexity param=0.3235832
## predicted class=Low's expected loss=0.660628 P(node) =1
## class counts: 271 281 276
## probabilities: 0.327 0.339 0.333
## left son=2 (282 obs) right son=3 (546 obs)
## Primary splits:
## Log_production_budget_adj < 17.47917 to the right, improve=93.390790, (0 missing)
## R < 0.5 to the left, improve=17.560580, (0 missing)
## Main_Action < 0.5 to the right, improve=11.879580, (0 missing)
## Main_Drama < 0.5 to the left, improve=10.839130, (0 missing)
## PG < 0.5 to the right, improve= 8.994799, (0 missing)
## Surrogate splits:
## Main_Action < 0.5 to the right, agree=0.693, adj=0.099, (0 split)
## Main_Adventure < 0.5 to the right, agree=0.690, adj=0.089, (0 split)
## genre_count < 3.5 to the right, agree=0.680, adj=0.060, (0 split)
## PG < 0.5 to the right, agree=0.679, adj=0.057, (0 split)
## Main_Animation < 0.5 to the right, agree=0.670, adj=0.032, (0 split)
##
## Node number 2: 282 observations, complexity param=0.003656307
## predicted class=High's expected loss=0.3049645 P(node) =0.3405797
## class counts: 196 19 67
## probabilities: 0.695 0.067 0.238
## left son=4 (110 obs) right son=5 (172 obs)
## Primary splits:
## Log_production_budget_adj < 18.19916 to the right, improve=12.389060, (0 missing)
## genre_count < 5.5 to the left, improve= 4.337058, (0 missing)
## R < 0.5 to the left, improve= 2.627660, (0 missing)
## Main_Romance < 0.5 to the left, improve= 2.005515, (0 missing)
## PG < 0.5 to the right, improve= 1.586866, (0 missing)
## Surrogate splits:
## Main_Adventure < 0.5 to the right, agree=0.660, adj=0.127, (0 split)
## PG < 0.5 to the right, agree=0.635, adj=0.064, (0 split)
## genre_count < 3.5 to the right, agree=0.631, adj=0.055, (0 split)
## G < 0.5 to the right, agree=0.621, adj=0.027, (0 split)
## Main_Fantasy < 0.5 to the right, agree=0.621, adj=0.027, (0 split)
##
## Node number 3: 546 observations, complexity param=0.1096892
## predicted class=Low's expected loss=0.5201465 P(node) =0.6594203
## class counts: 75 262 209
## probabilities: 0.137 0.480 0.383
## left son=6 (188 obs) right son=7 (358 obs)
## Primary splits:
## Log_production_budget_adj < 16.0469 to the left, improve=39.860070, (0 missing)
## R < 0.5 to the right, improve= 5.528132, (0 missing)
## Main_Drama < 0.5 to the right, improve= 4.601632, (0 missing)
## PG.13 < 0.5 to the left, improve= 2.926376, (0 missing)
## Main_Action < 0.5 to the left, improve= 1.834008, (0 missing)
## Surrogate splits:
## Main_Horror < 0.5 to the right, agree=0.672, adj=0.048, (0 split)
## Main_Documentary < 0.5 to the right, agree=0.659, adj=0.011, (0 split)
## Main_Fantasy < 0.5 to the right, agree=0.658, adj=0.005, (0 split)
##
## Node number 4: 110 observations
## predicted class=High's expected loss=0.1 P(node) =0.1328502
## class counts: 99 2 9
## probabilities: 0.900 0.018 0.082
##
## Node number 5: 172 observations, complexity param=0.003656307
## predicted class=High's expected loss=0.4360465 P(node) =0.2077295
## class counts: 97 17 58
## probabilities: 0.564 0.099 0.337
## left son=10 (168 obs) right son=11 (4 obs)
## Primary splits:
## genre_count < 5.5 to the left, improve=3.1414730, (0 missing)
## Log_production_budget_adj < 17.90181 to the right, improve=2.1174260, (0 missing)
## Main_Romance < 0.5 to the left, improve=1.0904750, (0 missing)
## R < 0.5 to the left, improve=0.8355518, (0 missing)
## PG < 0.5 to the right, improve=0.6807246, (0 missing)
##
## Node number 6: 188 observations
## predicted class=Low's expected loss=0.2234043 P(node) =0.2270531
## class counts: 9 146 33
## probabilities: 0.048 0.777 0.176
##
## Node number 7: 358 observations, complexity param=0.01157831
## predicted class=Medium expected loss=0.5083799 P(node) =0.4323671
## class counts: 66 116 176
## probabilities: 0.184 0.324 0.492
## left son=14 (186 obs) right son=15 (172 obs)
## Primary splits:
## Log_production_budget_adj < 16.89341 to the left, improve=8.885337, (0 missing)
## R < 0.5 to the right, improve=2.435754, (0 missing)
## Main_Horror < 0.5 to the right, improve=1.459841, (0 missing)
## Main_Drama < 0.5 to the right, improve=1.358856, (0 missing)
## PG.13 < 0.5 to the left, improve=1.156377, (0 missing)
## Surrogate splits:
## Main_Action < 0.5 to the left, agree=0.601, adj=0.169, (0 split)
## genre_count < 2.5 to the left, agree=0.550, adj=0.064, (0 split)
## Main_Drama < 0.5 to the right, agree=0.550, adj=0.064, (0 split)
## Greater_than_135 < 0.5 to the left, agree=0.539, adj=0.041, (0 split)
## Spring < 0.5 to the left, agree=0.528, adj=0.017, (0 split)
##
## Node number 10: 168 observations, complexity param=0.003656307
## predicted class=High's expected loss=0.422619 P(node) =0.2028986
## class counts: 97 17 54
## probabilities: 0.577 0.101 0.321
## left son=20 (57 obs) right son=21 (111 obs)
## Primary splits:
## Log_production_budget_adj < 17.90181 to the right, improve=2.0337840, (0 missing)
## PG < 0.5 to the right, improve=1.2824840, (0 missing)
## R < 0.5 to the left, improve=1.0190320, (0 missing)
## genre_count < 2.5 to the left, improve=0.7738095, (0 missing)
## Main_Science_Fiction < 0.5 to the left, improve=0.5772727, (0 missing)
## Surrogate splits:
## Main_Adventure < 0.5 to the right, agree=0.679, adj=0.053, (0 split)
## genre_count < 3.5 to the right, agree=0.673, adj=0.035, (0 split)
## Main_Mystery < 0.5 to the right, agree=0.667, adj=0.018, (0 split)
##
## Node number 11: 4 observations
## predicted class=Medium expected loss=0 P(node) =0.004830918
## class counts: 0 0 4
## probabilities: 0.000 0.000 1.000
##
## Node number 14: 186 observations, complexity param=0.01157831
## predicted class=Low's expected loss=0.5537634 P(node) =0.2246377
## class counts: 26 83 77
## probabilities: 0.140 0.446 0.414
## left son=28 (14 obs) right son=29 (172 obs)
## Primary splits:
## Main_Horror < 0.5 to the right, improve=2.220716, (0 missing)
## R < 0.5 to the right, improve=2.141954, (0 missing)
## Main_Romance < 0.5 to the left, improve=1.807414, (0 missing)
## Main_Mystery < 0.5 to the left, improve=1.713908, (0 missing)
## Main_Action < 0.5 to the right, improve=1.073431, (0 missing)
##
## Node number 15: 172 observations
## predicted class=Medium expected loss=0.4244186 P(node) =0.2077295
## class counts: 40 33 99
## probabilities: 0.233 0.192 0.576
##
## Node number 20: 57 observations, complexity param=0.003656307
## predicted class=High's expected loss=0.2982456 P(node) =0.06884058
## class counts: 40 3 14
## probabilities: 0.702 0.053 0.246
## left son=40 (45 obs) right son=41 (12 obs)
## Primary splits:
## Main_Comedy < 0.5 to the left, improve=3.011111, (0 missing)
## Spring < 0.5 to the right, improve=1.843972, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.428195, (0 missing)
## PG < 0.5 to the right, improve=1.428195, (0 missing)
## genre_count < 1.5 to the right, improve=1.202564, (0 missing)
## Surrogate splits:
## genre_count < 1.5 to the right, agree=0.877, adj=0.417, (0 split)
##
## Node number 21: 111 observations, complexity param=0.003656307
## predicted class=High's expected loss=0.4864865 P(node) =0.134058
## class counts: 57 14 40
## probabilities: 0.514 0.126 0.360
## left son=42 (97 obs) right son=43 (14 obs)
## Primary splits:
## genre_count < 3.5 to the left, improve=2.2726720, (0 missing)
## Main_Adventure < 0.5 to the left, improve=1.2347350, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.0828830, (0 missing)
## Main_Comedy < 0.5 to the right, improve=1.0193910, (0 missing)
## Log_production_budget_adj < 17.55139 to the left, improve=0.7932705, (0 missing)
##
## Node number 28: 14 observations, complexity param=0.005484461
## predicted class=Medium expected loss=0.5 P(node) =0.01690821
## class counts: 5 2 7
## probabilities: 0.357 0.143 0.500
## left son=56 (7 obs) right son=57 (7 obs)
## Primary splits:
## Log_production_budget_adj < 16.34233 to the right, improve=2.7142860, (0 missing)
## PG.13 < 0.5 to the left, improve=0.6785714, (0 missing)
## R < 0.5 to the right, improve=0.6785714, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=0.6785714, (0 missing)
## Summer < 0.5 to the right, improve=0.5119048, (0 missing)
## Surrogate splits:
## between_90_to_135 < 0.5 to the left, agree=0.643, adj=0.286, (0 split)
## Fall < 0.5 to the left, agree=0.571, adj=0.143, (0 split)
##
## Node number 29: 172 observations, complexity param=0.01157831
## predicted class=Low's expected loss=0.5290698 P(node) =0.2077295
## class counts: 21 81 70
## probabilities: 0.122 0.471 0.407
## left son=58 (87 obs) right son=59 (85 obs)
## Primary splits:
## R < 0.5 to the right, improve=2.339310, (0 missing)
## Main_Romance < 0.5 to the left, improve=1.968793, (0 missing)
## Main_Mystery < 0.5 to the left, improve=1.796408, (0 missing)
## Log_production_budget_adj < 16.69612 to the left, improve=1.234713, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.141605, (0 missing)
## Surrogate splits:
## PG.13 < 0.5 to the left, agree=0.890, adj=0.776, (0 split)
## PG < 0.5 to the left, agree=0.599, adj=0.188, (0 split)
## Log_production_budget_adj < 16.64124 to the left, agree=0.581, adj=0.153, (0 split)
## Spring < 0.5 to the left, agree=0.564, adj=0.118, (0 split)
## Fall < 0.5 to the right, agree=0.558, adj=0.106, (0 split)
##
## Node number 40: 45 observations
## predicted class=High's expected loss=0.2222222 P(node) =0.05434783
## class counts: 35 3 7
## probabilities: 0.778 0.067 0.156
##
## Node number 41: 12 observations, complexity param=0.003656307
## predicted class=Medium expected loss=0.4166667 P(node) =0.01449275
## class counts: 5 0 7
## probabilities: 0.417 0.000 0.583
## left son=82 (8 obs) right son=83 (4 obs)
## Primary splits:
## Log_production_budget_adj < 18.138 to the left, improve=2.083333000, (0 missing)
## Summer < 0.5 to the left, improve=2.083333000, (0 missing)
## PG.13 < 0.5 to the left, improve=0.166666700, (0 missing)
## genre_count < 2.5 to the left, improve=0.055555560, (0 missing)
## R < 0.5 to the left, improve=0.004761905, (0 missing)
##
## Node number 42: 97 observations, complexity param=0.003046923
## predicted class=High's expected loss=0.443299 P(node) =0.1171498
## class counts: 54 11 32
## probabilities: 0.557 0.113 0.330
## left son=84 (20 obs) right son=85 (77 obs)
## Primary splits:
## Main_Comedy < 0.5 to the right, improve=0.9470076, (0 missing)
## Log_production_budget_adj < 17.56539 to the left, improve=0.7965206, (0 missing)
## genre_count < 2.5 to the left, improve=0.6964138, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=0.6771579, (0 missing)
## Main_Drama < 0.5 to the right, improve=0.5467995, (0 missing)
## Surrogate splits:
## genre_count < 1.5 to the left, agree=0.814, adj=0.1, (0 split)
##
## Node number 43: 14 observations
## predicted class=Medium expected loss=0.4285714 P(node) =0.01690821
## class counts: 3 3 8
## probabilities: 0.214 0.214 0.571
##
## Node number 56: 7 observations
## predicted class=High's expected loss=0.4285714 P(node) =0.008454106
## class counts: 4 2 1
## probabilities: 0.571 0.286 0.143
##
## Node number 57: 7 observations
## predicted class=Medium expected loss=0.1428571 P(node) =0.008454106
## class counts: 1 0 6
## probabilities: 0.143 0.000 0.857
##
## Node number 58: 87 observations, complexity param=0.003656307
## predicted class=Low's expected loss=0.4367816 P(node) =0.1050725
## class counts: 8 49 30
## probabilities: 0.092 0.563 0.345
## left son=116 (51 obs) right son=117 (36 obs)
## Primary splits:
## Fall < 0.5 to the left, improve=2.4264140, (0 missing)
## Spring < 0.5 to the right, improve=1.5136200, (0 missing)
## Summer < 0.5 to the right, improve=1.0899550, (0 missing)
## Log_production_budget_adj < 16.40027 to the left, improve=0.5339603, (0 missing)
## genre_count < 2.5 to the right, improve=0.5277214, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 16.52246 to the left, agree=0.644, adj=0.139, (0 split)
## Summer < 0.5 to the right, agree=0.621, adj=0.083, (0 split)
## Main_Drama < 0.5 to the left, agree=0.609, adj=0.056, (0 split)
## Main_Adventure < 0.5 to the left, agree=0.598, adj=0.028, (0 split)
##
## Node number 59: 85 observations, complexity param=0.006398537
## predicted class=Medium expected loss=0.5294118 P(node) =0.102657
## class counts: 13 32 40
## probabilities: 0.153 0.376 0.471
## left son=118 (56 obs) right son=119 (29 obs)
## Primary splits:
## Log_production_budget_adj < 16.69612 to the left, improve=1.4084180, (0 missing)
## Main_Mystery < 0.5 to the left, improve=1.3850790, (0 missing)
## Main_Romance < 0.5 to the left, improve=0.9621641, (0 missing)
## Main_Drama < 0.5 to the right, improve=0.8483837, (0 missing)
## genre_count < 1.5 to the left, improve=0.8176471, (0 missing)
## Surrogate splits:
## PG < 0.5 to the left, agree=0.682, adj=0.069, (0 split)
## Main_Adventure < 0.5 to the left, agree=0.682, adj=0.069, (0 split)
## between_90_to_135 < 0.5 to the right, agree=0.671, adj=0.034, (0 split)
##
## Node number 82: 8 observations
## predicted class=High's expected loss=0.375 P(node) =0.009661836
## class counts: 5 0 3
## probabilities: 0.625 0.000 0.375
##
## Node number 83: 4 observations
## predicted class=Medium expected loss=0 P(node) =0.004830918
## class counts: 0 0 4
## probabilities: 0.000 0.000 1.000
##
## Node number 84: 20 observations
## predicted class=High's expected loss=0.3 P(node) =0.02415459
## class counts: 14 2 4
## probabilities: 0.700 0.100 0.200
##
## Node number 85: 77 observations, complexity param=0.003046923
## predicted class=High's expected loss=0.4805195 P(node) =0.09299517
## class counts: 40 9 28
## probabilities: 0.519 0.117 0.364
## left son=170 (24 obs) right son=171 (53 obs)
## Primary splits:
## Log_production_budget_adj < 17.74953 to the right, improve=1.1332390, (0 missing)
## Main_Drama < 0.5 to the right, improve=0.7355762, (0 missing)
## Main_Animation < 0.5 to the left, improve=0.4374634, (0 missing)
## Main_Science_Fiction < 0.5 to the left, improve=0.4374634, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=0.4306027, (0 missing)
## Surrogate splits:
## Main_Thriller < 0.5 to the right, agree=0.727, adj=0.125, (0 split)
## Main_Animation < 0.5 to the right, agree=0.701, adj=0.042, (0 split)
## Main_Science_Fiction < 0.5 to the right, agree=0.701, adj=0.042, (0 split)
##
## Node number 116: 51 observations
## predicted class=Low's expected loss=0.3529412 P(node) =0.0615942
## class counts: 6 33 12
## probabilities: 0.118 0.647 0.235
##
## Node number 117: 36 observations, complexity param=0.003656307
## predicted class=Medium expected loss=0.5 P(node) =0.04347826
## class counts: 2 16 18
## probabilities: 0.056 0.444 0.500
## left son=234 (3 obs) right son=235 (33 obs)
## Primary splits:
## Log_production_budget_adj < 16.13019 to the left, improve=1.2323230, (0 missing)
## Main_Drama < 0.5 to the right, improve=0.8527778, (0 missing)
## genre_count < 1.5 to the left, improve=0.5777778, (0 missing)
## Main_Thriller < 0.5 to the right, improve=0.3402778, (0 missing)
## Main_Action < 0.5 to the right, improve=0.2626263, (0 missing)
##
## Node number 118: 56 observations, complexity param=0.006398537
## predicted class=Low's expected loss=0.5535714 P(node) =0.06763285
## class counts: 8 25 23
## probabilities: 0.143 0.446 0.411
## left son=236 (24 obs) right son=237 (32 obs)
## Primary splits:
## Main_Drama < 0.5 to the right, improve=1.6458330, (0 missing)
## genre_count < 3.5 to the right, improve=1.2884620, (0 missing)
## Log_production_budget_adj < 16.17715 to the right, improve=1.0702020, (0 missing)
## Main_Romance < 0.5 to the left, improve=0.9544025, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=0.8034591, (0 missing)
## Surrogate splits:
## Main_Comedy < 0.5 to the left, agree=0.714, adj=0.333, (0 split)
## genre_count < 2.5 to the left, agree=0.661, adj=0.208, (0 split)
## Log_production_budget_adj < 16.59911 to the right, agree=0.589, adj=0.042, (0 split)
##
## Node number 119: 29 observations
## predicted class=Medium expected loss=0.4137931 P(node) =0.03502415
## class counts: 5 7 17
## probabilities: 0.172 0.241 0.586
##
## Node number 170: 24 observations
## predicted class=High's expected loss=0.3333333 P(node) =0.02898551
## class counts: 16 1 7
## probabilities: 0.667 0.042 0.292
##
## Node number 171: 53 observations, complexity param=0.003046923
## predicted class=High's expected loss=0.5471698 P(node) =0.06400966
## class counts: 24 8 21
## probabilities: 0.453 0.151 0.396
## left son=342 (20 obs) right son=343 (33 obs)
## Primary splits:
## Log_production_budget_adj < 17.55589 to the left, improve=2.0189250, (0 missing)
## Main_Drama < 0.5 to the right, improve=1.9834840, (0 missing)
## Main_Horror < 0.5 to the right, improve=1.5237740, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=0.8871069, (0 missing)
## genre_count < 1.5 to the left, improve=0.5731613, (0 missing)
## Surrogate splits:
## Main_Romance < 0.5 to the right, agree=0.66, adj=0.1, (0 split)
##
## Node number 234: 3 observations
## predicted class=Low's expected loss=0.3333333 P(node) =0.003623188
## class counts: 1 2 0
## probabilities: 0.333 0.667 0.000
##
## Node number 235: 33 observations, complexity param=0.003656307
## predicted class=Medium expected loss=0.4545455 P(node) =0.03985507
## class counts: 1 14 18
## probabilities: 0.030 0.424 0.545
## left son=470 (15 obs) right son=471 (18 obs)
## Primary splits:
## Main_Drama < 0.5 to the right, improve=0.9454545, (0 missing)
## Log_production_budget_adj < 16.54416 to the right, improve=0.7736597, (0 missing)
## genre_count < 1.5 to the left, improve=0.7676768, (0 missing)
## Main_Comedy < 0.5 to the left, improve=0.4319014, (0 missing)
## Main_Action < 0.5 to the right, improve=0.3454545, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 16.62341 to the right, agree=0.667, adj=0.267, (0 split)
## Main_Comedy < 0.5 to the left, agree=0.667, adj=0.267, (0 split)
## Greater_than_135 < 0.5 to the right, agree=0.606, adj=0.133, (0 split)
## genre_count < 1.5 to the left, agree=0.606, adj=0.133, (0 split)
## between_90_to_135 < 0.5 to the left, agree=0.576, adj=0.067, (0 split)
##
## Node number 236: 24 observations, complexity param=0.003656307
## predicted class=Low's expected loss=0.4583333 P(node) =0.02898551
## class counts: 5 13 6
## probabilities: 0.208 0.542 0.250
## left son=472 (18 obs) right son=473 (6 obs)
## Primary splits:
## Fall < 0.5 to the left, improve=1.7500000, (0 missing)
## Log_production_budget_adj < 16.1696 to the left, improve=1.0833330, (0 missing)
## Summer < 0.5 to the left, improve=1.0833330, (0 missing)
## Spring < 0.5 to the right, improve=0.7191877, (0 missing)
## genre_count < 1.5 to the left, improve=0.2916667, (0 missing)
##
## Node number 237: 32 observations, complexity param=0.003656307
## predicted class=Medium expected loss=0.46875 P(node) =0.03864734
## class counts: 3 12 17
## probabilities: 0.094 0.375 0.531
## left son=474 (3 obs) right son=475 (29 obs)
## Primary splits:
## genre_count < 3.5 to the right, improve=1.4058910, (0 missing)
## Log_production_budget_adj < 16.17023 to the right, improve=1.0592950, (0 missing)
## Main_Comedy < 0.5 to the left, improve=0.8125000, (0 missing)
## Main_Romance < 0.5 to the right, improve=0.7162356, (0 missing)
## Main_Thriller < 0.5 to the right, improve=0.4403736, (0 missing)
##
## Node number 342: 20 observations
## predicted class=High's expected loss=0.35 P(node) =0.02415459
## class counts: 13 2 5
## probabilities: 0.650 0.100 0.250
##
## Node number 343: 33 observations, complexity param=0.003046923
## predicted class=Medium expected loss=0.5151515 P(node) =0.03985507
## class counts: 11 6 16
## probabilities: 0.333 0.182 0.485
## left son=686 (13 obs) right son=687 (20 obs)
## Primary splits:
## Main_Drama < 0.5 to the right, improve=2.0771560, (0 missing)
## Log_production_budget_adj < 17.57828 to the right, improve=0.8035298, (0 missing)
## Summer < 0.5 to the left, improve=0.8035298, (0 missing)
## genre_count < 2.5 to the left, improve=0.7127897, (0 missing)
## Main_Action < 0.5 to the left, improve=0.5757576, (0 missing)
## Surrogate splits:
## genre_count < 2.5 to the left, agree=0.727, adj=0.308, (0 split)
## Main_Action < 0.5 to the left, agree=0.727, adj=0.308, (0 split)
## PG.13 < 0.5 to the right, agree=0.697, adj=0.231, (0 split)
## Greater_than_135 < 0.5 to the right, agree=0.697, adj=0.231, (0 split)
## Fall < 0.5 to the right, agree=0.697, adj=0.231, (0 split)
##
## Node number 470: 15 observations
## predicted class=Low's expected loss=0.4666667 P(node) =0.01811594
## class counts: 1 8 6
## probabilities: 0.067 0.533 0.400
##
## Node number 471: 18 observations
## predicted class=Medium expected loss=0.3333333 P(node) =0.02173913
## class counts: 0 6 12
## probabilities: 0.000 0.333 0.667
##
## Node number 472: 18 observations
## predicted class=Low's expected loss=0.3333333 P(node) =0.02173913
## class counts: 3 12 3
## probabilities: 0.167 0.667 0.167
##
## Node number 473: 6 observations
## predicted class=Medium expected loss=0.5 P(node) =0.007246377
## class counts: 2 1 3
## probabilities: 0.333 0.167 0.500
##
## Node number 474: 3 observations
## predicted class=Low's expected loss=0.3333333 P(node) =0.003623188
## class counts: 1 2 0
## probabilities: 0.333 0.667 0.000
##
## Node number 475: 29 observations
## predicted class=Medium expected loss=0.4137931 P(node) =0.03502415
## class counts: 2 10 17
## probabilities: 0.069 0.345 0.586
##
## Node number 686: 13 observations
## predicted class=High's expected loss=0.5384615 P(node) =0.01570048
## class counts: 6 4 3
## probabilities: 0.462 0.308 0.231
##
## Node number 687: 20 observations
## predicted class=Medium expected loss=0.35 P(node) =0.02415459
## class counts: 5 2 13
## probabilities: 0.250 0.100 0.650
The decision tree is a classification model that predicts the Log_Worldwide_Gross_Category (categorized into classes such as “Low’s,” “Medium,” and “High’s”) based on several predictor variables, including Log_production_budget_adj, ratings (e.g., PG.13, R, PG), seasonal release (Spring, Summer, Fall), and genre-related features (Main_Drama, Main_Action, etc.). The structure of the tree is based on splits of these features, with terminal nodes (leaves) providing the final predicted class.
Primary Split (Budget):
The first split is based on Log_production_budget_adj. If the adjusted production budget is less than 17.47917, the observation moves left. If it’s greater, it moves right. This indicates budget is the most critical variable determining whether a movie falls into “High’s,” “Medium,” or “Low’s” gross categories. Why it’s significant: Movies with larger budgets typically have higher grossing potential. This makes sense logically and aligns with known patterns in the film industry. Genre and Ratings:
Features like Main_Action, genre_count, and R also appear in splits, particularly when budget isn’t decisive. These secondary splits suggest that specific genres and whether a movie is rated R or otherwise help refine predictions, especially for mid-budget films. Class Distribution at Leaves:
At the terminal nodes (leaves), the probabilities show how strongly one category dominates: Node 4: Strong prediction for “High’s” (90% confidence). Node 14: Mixed category (“Low’s” dominates, but with 44.6% probability and competition from “Medium”).
Variable Importance:
Log_production_budget_adj is the most impactful variable, followed by genre_count, Main_Action, and others. Lower-ranked variables (e.g., Main_Horror, PG.13, R) appear occasionally but are less critical.
Evaluation of the Decision Tree
# Predict on the test data
test_data$predicted_categories <- predict(dt_model_categorical, newdata = test_data, type = "class")
# Confusion Matrix
library(caret)
confusion_matrix <- confusionMatrix(
data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 84 2 27
## Low's 12 74 35
## Medium 27 38 57
##
## Overall Statistics
##
## Accuracy : 0.6039
## 95% CI : (0.551, 0.6551)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.4061
##
## Mcnemar's Test P-Value : 0.06388
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.6829 0.6491 0.4790
## Specificity 0.8755 0.8058 0.7257
## Pos Pred Value 0.7434 0.6116 0.4672
## Neg Pred Value 0.8395 0.8298 0.7350
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2360 0.2079 0.1601
## Detection Prevalence 0.3174 0.3399 0.3427
## Balanced Accuracy 0.7792 0.7275 0.6024
library(pROC)
# Predict probabilities for test data
predicted_probs <- predict(dt_model_categorical, newdata = test_data, type = "prob")
# Initialize lists for storing ROC and AUC values
roc_list <- list()
auc_list <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)
# Generate ROC and AUC for each class
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
predicted_probs_binary <- predicted_probs[, category]
# ROC Curve
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC Curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8352176
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Low's : 0.8079237
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Medium : 0.6504627
library(dplyr)
# Convert predicted_probs to a data frame
predicted_probs <- as.data.frame(predicted_probs)
# Assign proper column names to predicted_probs
colnames(predicted_probs) <- levels(test_data$Log_Worldwide_Gross_Category)
# Add predicted probabilities to test_data
test_data <- cbind(test_data, predicted_probs)
# Ensure unique column names in test_data
names(test_data) <- make.unique(names(test_data))
# Verify no duplicate column names
if (anyDuplicated(names(test_data)) > 0) {
stop("Duplicate column names still exist in test_data!")
}
# Specify the category for analysis
category <- "High's"
# Sort test_data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(test_data[[category]]))
# Add deciles for the chosen category
test_data$decile <- ntile(test_data[[category]], 10)
# Calculate Lift Table
lift_table <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Log_Worldwide_Gross_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
)
# Print Lift Table for verification
print(lift_table)
## # A tibble: 10 × 5
## decile total events cumulative_events cumulative_percentage
## <int> <int> <int> <int> <dbl>
## 1 1 36 6 6 1
## 2 2 36 2 2 1
## 3 3 36 5 5 1
## 4 4 36 6 6 1
## 5 5 36 6 6 1
## 6 6 36 5 5 1
## 7 7 35 13 13 1
## 8 8 35 16 16 1
## 9 9 35 30 30 1
## 10 10 35 34 34 1
# Plot the Lift Chart
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
Confusion Matrix Overall Accuracy: The model achieved an accuracy of 60.39%. This means that approximately 60% of predictions align with the actual values. While this is above chance level, it leaves room for improvement.
Sensitivity (Recall):
High’s: The model correctly identified 68.29% of the High’s class instances. Low’s: The model correctly identified 64.91% of the Low’s class instances. Medium: The model correctly identified 47.90% of the Medium class instances. Specificity:
High’s: High specificity (87.55%), meaning it correctly rejected instances that were not High’s. Low’s and Medium: Slightly lower specificities for these classes.
Balanced Accuracy:
Balances sensitivity and specificity, with High’s being the best performing class (77.92%) and Medium lagging behind (60.24%). Lift Chart and Table Interpretation The Lift Chart for High’s indicates that the cumulative gain is flat across the deciles. This suggests that the model does not differentiate well between deciles for High’s. Ideally, you would expect a steeper gain curve early on, reflecting the model’s ability to concentrate High’s in the top deciles.
The Lift Table:
It shows the total number of predictions and actual High’s events per decile. The flatness of the cumulative percentage column confirms that the model’s ranking of High’s is not strong.
AUC for Classes
High’s (AUC = 0.835): Strong discrimination between High’s and other classes. Low’s (AUC = 0.808): Good performance but slightly lower than High’s. Medium (AUC = 0.650): Weakest discrimination, consistent with lower sensitivity and specificity.
For optimizing film investment, the best model is XGBoost.
Why? High Accuracy: Ensures precise prediction of “High’s” category, minimizing missed opportunities for high-grossing films. Feature Handling: Handles nonlinear relationships and interactions between features like budget, genre, and season effectively. Scalability: Performs well on large datasets and can be used to predict future investments reliably.
XGBoost is the most suitable for out business goal of maximizing returns on film investments.
IMDb_Rating
# Categorize IMDB_Category into buckets
data <- data %>%
mutate(IMDB_Category = case_when(
IMDb_Rating <= 6.0 ~ "Poor",
IMDb_Rating > 6.0 & IMDb_Rating <= 7.0 ~ "Good",
IMDb_Rating > 7.0 ~ "Excellent"
))
# Convert to factor
data$IMDB_Category <- as.factor(data$IMDB_Category)
# Check if the transformation is correct
table(data$IMDB_Category)
##
## Excellent Good Poor
## 273 532 379
successfully grouped movies into three distinct categories based on their IMDb ratings:
Excellent (IMDb_Rating > 7.0): Includes 273 movies that are rated highly by audiences. These movies likely have strong appeal and are regarded as high-quality productions. Good (6.0 < IMDb_Rating <= 7.0): This category, with 532 movies, represents the bulk of your dataset. These films are generally well-received but don’t reach the highest acclaim. Poor (IMDb_Rating <= 6.0): Encompassing 379 movies, this group includes those with weaker audience reception, potentially indicating lower production quality or appeal. By converting this information into a factor variable, you’ve prepared your data for categorical analysis, making it suitable for classification models. The counts indicate a reasonable distribution across the three categories, ensuring that each has sufficient representation for modeling.
This classification provides a foundation for identifying patterns and predictors of IMDb rating categories, aligning with your business objective of optimizing film investments based on audience reception.
Spliting the data
# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet) # For Ridge and LASSO regression
library(randomForest) # For Random Forest model
library(xgboost) # For Gradient Boosting model
library(Metrics) # For evaluation metrics
set.seed(123) # For reproducibility
# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]
multinomial logistic regression model
# Load the required library
library(nnet)
# Fit the multinomial logistic regression model
multinom_model <- multinom(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
data = train_data
)
## # weights: 84 (54 variable)
## initial value 909.650975
## iter 10 value 809.678022
## iter 20 value 793.107234
## iter 30 value 790.848413
## iter 40 value 789.935179
## iter 50 value 789.440017
## iter 60 value 789.428352
## final value 789.428307
## converged
# View model summary
summary(multinom_model)
## Call:
## multinom(formula = IMDB_Category ~ Log_production_budget_adj +
## PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
## data = train_data)
##
## Coefficients:
## (Intercept) Log_production_budget_adj PG.13 R PG
## Good 24.927043 -0.02282168 -10.646506 -10.806320 -10.397427
## Poor 6.274926 0.13951657 7.413371 6.879478 7.810383
## G between_90_to_135 Greater_than_135 Spring Summer
## Good -10.63351 -0.01849204 -0.9017206 0.4697993 0.55413689
## Poor 5.98092 -1.12727045 -2.6300366 0.4255005 0.05195477
## Fall genre_count Main_Action Main_Adventure Main_Animation
## Good -0.1520420 0.03402828 -12.92811 -13.36262 -14.15792
## Poor -0.5515793 -0.05509641 -13.84344 -14.08018 -16.44833
## Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Good -13.27961 -13.07354 -14.55232 -14.04569 20.88705
## Poor -13.97709 -14.80415 -15.16718 -15.08242 18.84732
## Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Good -13.07642 -10.77096 27.74199 -12.98846 -13.27668
## Poor -12.53571 -11.16733 26.58487 -14.04582 -13.73213
## Main_Thriller Main_History
## Good -13.09546 -12.89506
## Poor -14.05920 -14.59161
##
## Std. Errors:
## (Intercept) Log_production_budget_adj PG.13 R PG
## Good 1.201371 0.08792285 0.3930964 0.3621882 0.4558769
## Poor 1.387132 0.10162394 0.4563415 0.4258400 0.5151556
## G between_90_to_135 Greater_than_135 Spring Summer Fall
## Good 0.9054959 0.4149134 0.5604887 0.2948914 0.2851564 0.2573759
## Poor 1.1830739 0.4150361 0.6566664 0.3091847 0.3130964 0.2854068
## genre_count Main_Action Main_Adventure Main_Animation Main_Comedy
## Good 0.1049707 0.5091539 0.6382766 0.7244662 0.4847574
## Poor 0.1166624 0.5196586 0.6472403 0.8687498 0.4897048
## Main_Crime Main_Documentary Main_Drama Main_Family Main_Fantasy
## Good 0.5644480 1.028526 0.4607067 0.5883808 1.174404
## Poor 0.6657735 1.101435 0.4729978 0.5883808 1.085745
## Main_Horror Main_Mystery Main_Romance Main_Science_Fiction Main_Thriller
## Good 1.044573 0.5350485 0.6929191 0.8112293 0.555842
## Poor 1.046672 0.5350485 0.7277143 0.8217027 0.582732
## Main_History
## Good 0.9587806
## Poor 1.3065222
##
## Residual Deviance: 1578.857
## AIC: 1686.857
The AIC (Akaike Information Criterion) for the model is 1686.857, which is a metric to evaluate model quality. A lower AIC generally indicates a better model, but this depends on the complexity of the model and comparison with other models. The model converged successfully, as indicated by the declining deviance and the message.
Coefficients:
The coefficients represent the log odds of being in the “Good” or “Poor” categories compared to the reference category (likely “Excellent,” although this is implied and not explicitly stated in the output).
Statistical Significance:
The standard errors (SE) allow us to assess the significance of coefficients:
If the absolute value of a coefficient divided by its SE is greater than ~1.96, the coefficient is statistically significant at a 95% confidence level.
Variable Insights:
Log_production_budget_adj: Slightly negative for “Good” but positive for “Poor.” This suggests that films with higher budgets may have more extreme outcomes (either “Excellent” or “Poor”) rather than falling in the “Good” category.
Seasonality:
Summer has a positive association for both “Good” and “Poor,” indicating it might not strongly correlate with “Excellent” films. Fall has a negative association for both “Good” and “Poor,” implying it may favor “Excellent” films.
Genres:
Main_Family has a strong positive association with “Good” compared to “Excellent,” while other genres (like Main_Comedy, Main_Crime, Main_Action) have large negative coefficients, indicating a lower likelihood of being “Good” relative to “Excellent.” Main_Mystery shows a significant positive association with “Good,” while Main_Drama and Main_Romance negatively correlate with “Poor.”
Model Fit:
Residual Deviance: At 1578.857, this value reflects how well the model fits the data. Lower deviance indicates better fit, but the absolute value alone is hard to interpret without comparing it to the null model. AIC: A value of 1686.857 suggests that while the model is moderately complex, there is room for improvement. Consider comparing this AIC with other models.
Key Takeaway:
The model suggests that film attributes like budget, genres, and seasonality are significant predictors of IMDB ratings. However, some coefficients are counterintuitive or weak, and further refinement (e.g., interaction terms, feature selection) might improve predictions.
The high deviance and AIC indicate that there may be other unobserved factors influencing film ratings that are not captured by this model.
Evaluating multinomial logistic regression model
library(pROC)
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")
# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
data = factor(test_data$predicted_categories, levels = levels(test_data$IMDB_Category)),
reference = factor(test_data$IMDB_Category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 26 31 9
## Good 40 94 63
## Poor 8 38 47
##
## Overall Statistics
##
## Accuracy : 0.4691
## 95% CI : (0.4163, 0.5224)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.35437
##
## Kappa : 0.1448
##
## Mcnemar's Test P-Value : 0.06051
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.35135 0.5767 0.3950
## Specificity 0.85816 0.4663 0.8059
## Pos Pred Value 0.39394 0.4772 0.5054
## Neg Pred Value 0.83448 0.5660 0.7262
## Prevalence 0.20787 0.4579 0.3343
## Detection Rate 0.07303 0.2640 0.1320
## Detection Prevalence 0.18539 0.5534 0.2612
## Balanced Accuracy 0.60475 0.5215 0.6004
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$IMDB_Category)
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
# ROC Curve
roc_obj_test <- roc(true_binary, predicted_probs)
roc_list_test[[category]] <- roc_obj_test
auc_list_test[[category]] <- auc(roc_obj_test)
# Plot ROC Curve for this class
plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent on Test Data: 0.6775925
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Good on Test Data: 0.5396548
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Poor on Test Data: 0.6835798
# Lift Chart for each category
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
# Add deciles for the chosen category
test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
# Calculate Lift
lift_table_test <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(IMDB_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
)
# Plot Lift Chart for the current category
plot(
lift_table_test$decile, lift_table_test$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category, "on Test Data")
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}
Accuracy & Kappa:
Accuracy: 46.91%—indicating moderate performance. Kappa: 0.1448—suggests weak agreement between predictions and actual labels.
Class Performance:
Sensitivity: Best for “Good” (57.67%), weaker for “Excellent” (35.14%) and “Poor” (39.50%). AUC: “Excellent” (0.678) and “Poor” (0.684) show moderate discrimination, while “Good” (0.540) struggles.
Lift Charts:
Flat lift curves for all categories indicate limited predictive power over random assignment.
The model’s performance is marginal and may not effectively predict IMDb categories to optimize film investments. A more robust model, such as Random Forests or XGBoost, could better capture complex patterns and improve prediction accuracy.
Random forest
# Load the required library
library(randomForest)
# Train Random Forest Model for Classification
rf_model <- randomForest(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
ntree = 500, # Number of trees
mtry = 5, # Number of predictors randomly selected at each split
importance = TRUE, # Calculate variable importance
proximity = TRUE # Enable proximity matrix for better insights
)
# View the model summary
print(rf_model)
##
## Call:
## randomForest(formula = IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, ntree = 500, mtry = 5, importance = TRUE, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 51.93%
## Confusion matrix:
## Excellent Good Poor class.error
## Excellent 74 102 23 0.6281407
## Good 47 242 80 0.3441734
## Poor 21 157 82 0.6846154
# Feature Importance
importance(rf_model)
## Excellent Good Poor
## Log_production_budget_adj 5.5562595 -0.617390650 4.4737902
## PG.13 6.4343914 0.511678030 -0.9170209
## R 3.5973168 0.007512022 4.4923900
## PG 0.1780894 -1.327251776 0.2481510
## G 2.5560984 -0.144992167 -1.5442459
## between_90_to_135 8.0395917 6.164723251 8.1643830
## Greater_than_135 12.5703393 0.710064025 0.7932221
## Spring 8.6102187 -1.160330481 0.3737738
## Summer 3.5266447 5.061002726 -5.5845587
## Fall 6.9514127 -2.522729238 4.0482813
## genre_count 7.6232133 -0.849847956 9.5990737
## Main_Action 7.3320425 -1.252873721 -2.5025475
## Main_Adventure 1.1714017 -4.788950757 0.9223408
## Main_Animation 4.1085949 5.032231578 -6.8322188
## Main_Comedy 8.0625707 -2.297877811 5.0272365
## Main_Crime 1.2840318 0.028878052 5.2748762
## Main_Documentary -0.3980004 -1.155840678 -0.4537382
## Main_Drama 27.3651902 3.002580171 9.2495006
## Main_Family 3.7864771 5.528543198 -2.3869721
## Main_Fantasy 1.2341850 2.041381246 6.8349901
## Main_Horror 12.8848234 -5.974479746 12.9673035
## Main_Mystery 3.6792786 -1.003593665 -0.6639728
## Main_History 1.0892667 -0.872438390 -1.5494299
## Main_Romance -2.8136036 -1.958098679 -1.9709389
## Main_Science_Fiction -1.0251046 -2.465740800 -1.4690986
## Main_Thriller 7.6044038 -1.387403030 0.7149374
## MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj 4.16423073 71.437829
## PG.13 2.52675789 7.323162
## R 4.24099406 8.744664
## PG -0.66968922 5.234426
## G 0.29227849 1.389615
## between_90_to_135 12.75845077 11.009628
## Greater_than_135 10.03884538 6.039729
## Spring 3.63649875 8.993754
## Summer 2.46065818 8.577208
## Fall 4.76076206 9.674141
## genre_count 8.34598082 29.336847
## Main_Action -0.02015565 6.788580
## Main_Adventure -2.47071082 4.326703
## Main_Animation 3.28286937 3.226865
## Main_Comedy 5.14133968 8.135388
## Main_Crime 2.63592809 5.119881
## Main_Documentary -1.16443631 1.928174
## Main_Drama 24.12249479 16.174920
## Main_Family 4.06843841 1.700587
## Main_Fantasy 6.69061140 4.128141
## Main_Horror 9.54124123 6.732910
## Main_Mystery -0.17380793 1.614617
## Main_History -0.93868309 1.711788
## Main_Romance -3.61497776 2.977116
## Main_Science_Fiction -3.15656377 3.258371
## Main_Thriller 2.87512758 5.750493
varImpPlot(rf_model) # Plot variable importance
Model Performance:
OOB Error Rate: 51.93%, suggesting moderate classification performance.
Confusion Matrix:
“Good” had the lowest class error (34.42%), indicating it is predicted relatively better. “Excellent” and “Poor” had higher class errors (62.81% and 68.46%, respectively).
Feature Importance:
The most important predictors based on Mean Decrease Accuracy and Mean Decrease Gini are:
Main_Drama: Highest importance, strongly contributing to the model’s accuracy and split quality. Log_production_budget_adj and genre_count: Also key predictors, indicating their relevance in categorizing IMDb ratings.
Between_90_to_135: Demonstrated high importance, suggesting runtime as a significant factor. Less influential features include Main_Science_Fiction and Main_Romance.
Variable Influence:
The importance plot emphasizes “Main_Drama” and “Log_production_budget_adj” as pivotal features, highlighting their correlation with IMDb rating categories.
Seasonal variables (e.g., Spring, Summer, Fall) and genre-specific variables (e.g., Main_Comedy, Main_Horror) show varied but moderate impact.
The Random Forest model is better suited for identifying patterns across categories compared to Multinomial Logistic Regression. Its performance indicates potential for predicting IMDb ratings, particularly when focused on key drivers like production budget, genre, and drama. Further optimization, like tuning hyperparameters, may improve predictive accuracy.
Evaluating Random Forest model
# Predict on the test data
test_data$rf_predicted_categories <- predict(rf_model, newdata = test_data)
# Confusion Matrix
rf_confusion_matrix_test <- confusionMatrix(
data = factor(test_data$rf_predicted_categories, levels = levels(test_data$IMDB_Category)),
reference = factor(test_data$IMDB_Category)
)
print(rf_confusion_matrix_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 25 28 10
## Good 44 104 64
## Poor 5 31 45
##
## Overall Statistics
##
## Accuracy : 0.4888
## 95% CI : (0.4357, 0.542)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.1320677
##
## Kappa : 0.168
##
## Mcnemar's Test P-Value : 0.0008202
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.33784 0.6380 0.3782
## Specificity 0.86525 0.4404 0.8481
## Pos Pred Value 0.39683 0.4906 0.5556
## Neg Pred Value 0.83276 0.5903 0.7309
## Prevalence 0.20787 0.4579 0.3343
## Detection Rate 0.07022 0.2921 0.1264
## Detection Prevalence 0.17697 0.5955 0.2275
## Balanced Accuracy 0.60154 0.5392 0.6131
# ROC Curve and AUC for each class
rf_roc_list_test <- list()
rf_auc_list_test <- list()
categories <- levels(test_data$IMDB_Category)
print(categories)
## [1] "Excellent" "Good" "Poor"
predicted_probs <- predict(rf_model, newdata = test_data, type = "prob")
print(colnames(predicted_probs))
## [1] "Excellent" "Good" "Poor"
categories <- colnames(predicted_probs)
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
# Extract predicted probabilities for the current category
rf_predicted_probs <- predicted_probs[, category] # Use the inspected column name
# ROC Curve
rf_roc_obj_test <- roc(true_binary, rf_predicted_probs)
rf_roc_list_test[[category]] <- rf_roc_obj_test
rf_auc_list_test[[category]] <- auc(rf_roc_obj_test)
# Plot ROC Curve for this class
plot(rf_roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "red")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, "on Test Data:", rf_auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent on Test Data: 0.7138442
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Good on Test Data: 0.5467275
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Poor on Test Data: 0.6822324
# Lift Chart for each category
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predict(rf_model, newdata = test_data, type = "prob")[, category]))
# Add deciles for the chosen category
test_data$rf_decile <- ntile(predict(rf_model, newdata = test_data, type = "prob")[, category], 10)
# Calculate Lift
rf_lift_table_test <- test_data %>%
group_by(rf_decile) %>%
summarize(
total = n(),
events = sum(IMDB_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
)
# Plot Lift Chart for the current category
plot(
rf_lift_table_test$rf_decile, rf_lift_table_test$cumulative_percentage,
type = "o", col = "red", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category, "on Test Data")
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}
Confusion Matrix Performance:
Overall accuracy of the Random Forest model on test data is 48.88%, with a Kappa statistic of 0.168, indicating a slight improvement over random classification but with room for better performance.
Class-specific sensitivity and specificity:
Excellent: Sensitivity = 33.78%, Specificity = 86.53%, PPV = 39.68%. Good: Sensitivity = 63.80%, Specificity = 44.04%, PPV = 49.06%. Poor: Sensitivity = 37.82%, Specificity = 84.81%, PPV = 55.56%.
Area Under the Curve (AUC):
Excellent: AUC = 0.714, indicating reasonable model performance for this category. Good: AUC = 0.547, suggesting limited model discrimination. Poor: AUC = 0.682, showing moderate classification ability.
Variable Importance:
The most important predictors influencing IMDb categories are: Main_Drama (highest Mean Decrease in Accuracy and Gini). Log_production_budget_adj. Between_90_to_135 and Greater_than_135 for movie runtime. Genre Count and specific genres like Main_Horror and Main_Comedy.
Insights from ROC Curves:
For Excellent, the ROC curve and AUC highlight better classification ability than other categories. Good shows almost random classification, with AUC close to 0.5. Poor is moderately well-classified, but improvements can still be made.
The Random Forest model offers slightly better performance for predicting Excellent-rated films than other categories, with decent AUC (0.714). However, the overall model accuracy and class-specific performance metrics indicate the need for further optimization or exploration of alternative models. Feature engineering or ensemble methods might enhance predictive power, especially for the challenging categories like “Good.”
XGBoost
# Load necessary libraries
library(xgboost)
# Prepare data for XGBoost
x_train <- model.matrix(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data
)[, -1] # Remove intercept column
# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$IMDB_Category) - 1
# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
data = as.matrix(x_train),
label = y_train,
objective = "multi:softprob", # Multi-class classification
num_class = length(levels(train_data$IMDB_Category)), # Number of classes
nrounds = 100, # Number of boosting rounds
max_depth = 6, # Tree depth
eta = 0.1, # Learning rate
colsample_bytree = 0.8, # Subsample ratio of columns
verbose = 1 # Print training progress
)
## [1] train-mlogloss:1.074479
## [2] train-mlogloss:1.052844
## [3] train-mlogloss:1.027775
## [4] train-mlogloss:1.010434
## [5] train-mlogloss:0.989777
## [6] train-mlogloss:0.972292
## [7] train-mlogloss:0.956994
## [8] train-mlogloss:0.941578
## [9] train-mlogloss:0.926042
## [10] train-mlogloss:0.913551
## [11] train-mlogloss:0.902381
## [12] train-mlogloss:0.888798
## [13] train-mlogloss:0.876363
## [14] train-mlogloss:0.865105
## [15] train-mlogloss:0.853369
## [16] train-mlogloss:0.845262
## [17] train-mlogloss:0.837227
## [18] train-mlogloss:0.829368
## [19] train-mlogloss:0.820469
## [20] train-mlogloss:0.813455
## [21] train-mlogloss:0.806492
## [22] train-mlogloss:0.798595
## [23] train-mlogloss:0.792434
## [24] train-mlogloss:0.785758
## [25] train-mlogloss:0.777530
## [26] train-mlogloss:0.772429
## [27] train-mlogloss:0.767649
## [28] train-mlogloss:0.762378
## [29] train-mlogloss:0.755696
## [30] train-mlogloss:0.751969
## [31] train-mlogloss:0.747556
## [32] train-mlogloss:0.744093
## [33] train-mlogloss:0.739338
## [34] train-mlogloss:0.732562
## [35] train-mlogloss:0.728322
## [36] train-mlogloss:0.723797
## [37] train-mlogloss:0.720946
## [38] train-mlogloss:0.717706
## [39] train-mlogloss:0.714799
## [40] train-mlogloss:0.711286
## [41] train-mlogloss:0.707176
## [42] train-mlogloss:0.705031
## [43] train-mlogloss:0.701661
## [44] train-mlogloss:0.698750
## [45] train-mlogloss:0.694957
## [46] train-mlogloss:0.691874
## [47] train-mlogloss:0.689633
## [48] train-mlogloss:0.687683
## [49] train-mlogloss:0.683668
## [50] train-mlogloss:0.681192
## [51] train-mlogloss:0.679446
## [52] train-mlogloss:0.677509
## [53] train-mlogloss:0.675144
## [54] train-mlogloss:0.673366
## [55] train-mlogloss:0.670637
## [56] train-mlogloss:0.668569
## [57] train-mlogloss:0.665969
## [58] train-mlogloss:0.663650
## [59] train-mlogloss:0.660852
## [60] train-mlogloss:0.658927
## [61] train-mlogloss:0.655505
## [62] train-mlogloss:0.652286
## [63] train-mlogloss:0.650891
## [64] train-mlogloss:0.648573
## [65] train-mlogloss:0.644471
## [66] train-mlogloss:0.641360
## [67] train-mlogloss:0.640229
## [68] train-mlogloss:0.638847
## [69] train-mlogloss:0.636355
## [70] train-mlogloss:0.634092
## [71] train-mlogloss:0.631408
## [72] train-mlogloss:0.629970
## [73] train-mlogloss:0.627481
## [74] train-mlogloss:0.624073
## [75] train-mlogloss:0.621771
## [76] train-mlogloss:0.619510
## [77] train-mlogloss:0.614699
## [78] train-mlogloss:0.612588
## [79] train-mlogloss:0.610307
## [80] train-mlogloss:0.606432
## [81] train-mlogloss:0.603806
## [82] train-mlogloss:0.601372
## [83] train-mlogloss:0.598331
## [84] train-mlogloss:0.596270
## [85] train-mlogloss:0.594136
## [86] train-mlogloss:0.592415
## [87] train-mlogloss:0.590526
## [88] train-mlogloss:0.588512
## [89] train-mlogloss:0.586801
## [90] train-mlogloss:0.585678
## [91] train-mlogloss:0.584121
## [92] train-mlogloss:0.582822
## [93] train-mlogloss:0.581860
## [94] train-mlogloss:0.577679
## [95] train-mlogloss:0.576566
## [96] train-mlogloss:0.574965
## [97] train-mlogloss:0.573818
## [98] train-mlogloss:0.572281
## [99] train-mlogloss:0.568154
## [100] train-mlogloss:0.567374
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
## Feature Gain Cover Frequency
## 1: Log_production_budget_adj 0.4233138823 0.466293781 0.415887850
## 2: genre_count 0.1072688206 0.060595627 0.121183801
## 3: Main_Drama 0.0767843067 0.036568122 0.023520249
## 4: Fall 0.0413833587 0.021744791 0.045638629
## 5: between_90_to_135 0.0378092282 0.033511624 0.035669782
## 6: R 0.0326238949 0.017379619 0.029127726
## 7: Main_Horror 0.0299443627 0.043243943 0.022897196
## 8: Spring 0.0268838237 0.019189010 0.032866044
## 9: PG.13 0.0266688630 0.010705740 0.039096573
## 10: Summer 0.0255862226 0.033776733 0.044704050
## 11: Main_Comedy 0.0238457573 0.017231783 0.025545171
## 12: Greater_than_135 0.0207961257 0.027010405 0.020404984
## 13: Main_Thriller 0.0179591402 0.023251360 0.018068536
## 14: PG 0.0174209573 0.010606143 0.020560748
## 15: Main_Action 0.0155807074 0.007177278 0.018380062
## 16: Main_Crime 0.0152833163 0.026108310 0.014485981
## 17: Main_Fantasy 0.0123369093 0.024487168 0.008878505
## 18: Main_Animation 0.0105049052 0.023966499 0.012149533
## 19: Main_Adventure 0.0101526928 0.007793446 0.012616822
## 20: Main_Family 0.0062642320 0.026925637 0.007943925
## 21: Main_Mystery 0.0058649216 0.029570608 0.009968847
## 22: Main_Science_Fiction 0.0043164510 0.009034000 0.005919003
## 23: G 0.0038079085 0.003121660 0.003271028
## 24: Main_Documentary 0.0034662981 0.005663165 0.003894081
## 25: Main_History 0.0031460959 0.007969614 0.004517134
## 26: Main_Romance 0.0009868179 0.007073935 0.002803738
## Feature Gain Cover Frequency
xgb.plot.importance(importance)
Feature Importance:
The most important features based on the gain are:
Log_production_budget_adj: Contributes significantly to predictions with the highest gain (42.33%). Genre count: Second most important, indicating the diversity of genres in a film as a strong predictor. Main_Drama, Fall, and between_90_to_135 (runtime) are also notable contributors. Features like Main_Romance and Main_History exhibit negligible influence, suggesting limited predictive power for IMDb categories.
Gain, Cover, and Frequency Insights:
Gain measures the contribution of each feature to improving accuracy. Log_production_budget_adj dominates, followed by genre_count. Cover indicates how frequently a feature appears in the model. While Log_production_budget_adj has high cover, features like Main_Horror and between_90_to_135 also show relatively frequent use in trees. Frequency reflects how often a feature is selected for splitting. Features like Log_production_budget_adj and genre_count are split on frequently.
Performance Indicators:
Feature importance visualization indicates that Log_production_budget_adj and genre_count significantly influence predictions, aligning with domain expectations (e.g., budget and genre diversity affecting IMDb scores).
Lift and Predictive Analysis:
The dominance of features related to budget and genre diversity suggests that these metrics are critical for predicting IMDb performance categories (Excellent, Good, Poor). Runtime and seasonal release windows (e.g., Fall, Spring, Summer) also provide meaningful predictive insights.
The XGBoost model strongly prioritizes production budget, genre diversity, and runtime as the most critical predictors for IMDb rating categories. These insights align with intuitive industry expectations, emphasizing the importance of resource allocation and content variety in determining movie success.
XGBoost Model Evaluation
# Prepare test data matrix
x_test <- model.matrix(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = test_data
)[, -1]
# Encode test labels as numeric (0-based)
y_test <- as.numeric(test_data$IMDB_Category) - 1
# Reshape predicted probabilities into a matrix
pred_matrix <- matrix(pred_probs, nrow = nrow(x_test), byrow = TRUE)
# Get predicted classes (1-based indexing)
pred_classes <- max.col(pred_matrix) - 1 # Convert from 1-based to 0-based indexing for compatibility
# Convert to factor using the original levels of the dependent variable
test_data$predicted_categories <- factor(pred_classes, labels = levels(test_data$IMDB_Category))
cat("Rows in predicted categories:", length(test_data$predicted_categories), "\n")
## Rows in predicted categories: 356
cat("Rows in test data:", nrow(test_data), "\n")
## Rows in test data: 356
library(caret)
confusion_matrix <- confusionMatrix(
data = test_data$predicted_categories,
reference = test_data$IMDB_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 33 62 29
## Good 26 51 27
## Poor 15 50 63
##
## Overall Statistics
##
## Accuracy : 0.4129
## 95% CI : (0.3613, 0.466)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.9607
##
## Kappa : 0.1285
##
## Mcnemar's Test P-Value : 9.302e-06
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.4459 0.3129 0.5294
## Specificity 0.6773 0.7254 0.7257
## Pos Pred Value 0.2661 0.4904 0.4922
## Neg Pred Value 0.8233 0.5556 0.7544
## Prevalence 0.2079 0.4579 0.3343
## Detection Rate 0.0927 0.1433 0.1770
## Detection Prevalence 0.3483 0.2921 0.3596
## Balanced Accuracy 0.5616 0.5191 0.6276
# ROC Curve and AUC for each class
library(pROC)
# Initialize lists to store results
roc_list <- list()
auc_list <- list()
# Iterate through categories
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
# Extract predicted probabilities for the current category
predicted_probs <- pred_matrix[, which(categories == category)]
# ROC Curve
roc_obj <- roc(true_binary, predicted_probs)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC Curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.5472733
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Good : 0.5122223
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Poor : 0.6884551
# Lift Chart for each category
library(dplyr)
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(pred_matrix[, which(categories == category)]))
# Add deciles for the chosen category
test_data$decile <- ntile(pred_matrix[, which(categories == category)], 10)
# Calculate Lift
lift_table <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(IMDB_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
)
# Plot Lift Chart
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(0, 0.1, col = "gray", lty = 2) # Reference line
}
The accuracy of the XGBoost model is 41.29%, which is relatively low and marginally above random chance. The Kappa statistic (0.1285) reflects slight agreement beyond chance, suggesting limited effectiveness for this classification task. No Information Rate (NIR) is 45.79%, indicating the proportion of the majority class in the dataset. The model’s performance is below NIR, highlighting its underperformance.
Class-Specific Metrics:
Excellent:
Sensitivity (Recall): 44.59% of “Excellent” films were correctly classified. Specificity: 67.73%, indicating moderate success in rejecting non-“Excellent” categories. AUC: 0.547, suggesting minimal ability to differentiate “Excellent” from others.
Good: Sensitivity: 31.29%, indicating significant misclassification. AUC: 0.512, close to random guessing.
Poor: Sensitivity: 52.94%, indicating better-than-average identification. AUC: 0.688, showing moderate discrimination power for “Poor” films.
Confusion Matrix Insights:
High confusion among classes: Many “Good” films were misclassified as “Excellent” or “Poor.” “Poor” has the highest detection rate (52.94%) but is still far from ideal.
ROC Curves: “Poor” class has the highest AUC (0.688), showing reasonable discrimination. Other classes have AUCs near 0.5, reflecting limited predictive capacity.
The XGBoost model struggles with this multi-class classification task, particularly for distinguishing “Excellent” and “Good” IMDb categories. While it performs slightly better for “Poor” films
Polynomial Logistic Regression
# Fit a polynomial logistic regression model
library(nnet)
polynomial_logistic_model <- multinom(
IMDB_Category ~ poly(Log_production_budget_adj, degree = 2) +
PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History +
Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights: 87 (56 variable)
## initial value 909.650975
## iter 10 value 813.084800
## iter 20 value 792.425301
## iter 30 value 789.841615
## iter 40 value 788.661010
## iter 50 value 788.268289
## iter 60 value 788.130398
## iter 70 value 788.128120
## final value 788.128057
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = IMDB_Category ~ poly(Log_production_budget_adj,
## degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_History + Main_History + Main_Romance + Main_Science_Fiction +
## Main_Thriller, data = train_data, maxit = 1000)
##
## Coefficients:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Good 23.988125 0.1377281
## Poor 8.765919 6.0120151
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R
## Good 3.6679933 -11.08989 -11.226961
## Poor -0.0393772 6.23762 5.711377
## PG G between_90_to_135 Greater_than_135 Spring
## Good -10.843189 -11.069835 -0.015003 -0.9716645 0.4621885
## Poor 6.634967 4.813758 -1.142498 -2.6539811 0.4246470
## Summer Fall genre_count Main_Action Main_Adventure
## Good 0.53561307 -0.1540647 0.01754914 -11.92844 -12.43651
## Poor 0.06041035 -0.5552578 -0.06005683 -12.78934 -13.03532
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Good -13.20400 -12.21288 -12.01493 -13.50688 -12.99334
## Poor -15.42168 -12.91627 -13.73824 -14.09629 -14.02027
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Good 14.88201 -12.12609 -9.705199 18.30772 -11.76825
## Poor 12.82344 -11.48261 -10.090125 17.13281 -13.48952
## Main_Romance Main_Science_Fiction Main_Thriller
## Good -11.95606 -12.29441 -12.01625
## Poor -13.01636 -12.68923 -12.99570
##
## Std. Errors:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Good 0.5378852 3.440871
## Poor 0.5775088 3.969484
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R PG
## Good 2.963938 0.3063993 0.3151366 0.3686127
## Poor 3.620412 0.3615220 0.3733576 0.4143836
## G between_90_to_135 Greater_than_135 Spring Summer Fall
## Good 0.8382865 0.4163319 0.5655286 0.2951358 0.2860115 0.2576835
## Poor 1.1039891 0.4169914 0.6593857 0.3094216 0.3138867 0.2854426
## genre_count Main_Action Main_Adventure Main_Animation Main_Comedy
## Good 0.1058137 0.4936114 0.6297194 0.7144038 0.4783541
## Poor 0.1173153 0.5023833 0.6366743 0.8581629 0.4830573
## Main_Crime Main_Documentary Main_Drama Main_Family Main_Fantasy
## Good 0.5566858 1.034188 0.4595046 0.5886378 1.169425
## Poor 0.6589084 1.112223 0.4721036 0.5886378 1.080924
## Main_Horror Main_Mystery Main_History Main_Romance Main_Science_Fiction
## Good 1.045242 0.5341723 0.9645703 0.6941953 0.8039164
## Poor 1.047749 0.5341723 1.3070485 0.7268909 0.8143117
## Main_Thriller
## Good 0.5518813
## Poor 0.5792322
##
## Residual Deviance: 1576.256
## AIC: 1688.256
The polynomial logistic regression model incorporates a quadratic transformation (poly) for the variable Log_production_budget_adj. This allows the model to capture potential nonlinear relationships between production budget and IMDb categories. The model successfully converged, achieving a final residual deviance of 1576.256 and an AIC (Akaike Information Criterion) of 1688.256. These metrics suggest slightly improved fit compared to a simpler multinomial logistic regression model without polynomial terms.
Coefficients Analysis:
Intercepts: Represent baseline probabilities for each IMDb category when all predictors are zero.
Log_production_budget_adj: For the “Good” category, the quadratic term is significant, with positive coefficients, indicating a nonlinear relationship with the budget. For the “Poor” category, the linear term is stronger, suggesting that a higher budget correlates negatively with being classified as “Poor.”
Genre & Seasonal Effects: Main_Family and Main_Mystery genres show high positive coefficients for the “Good” category, indicating these genres might increase the likelihood of higher IMDb ratings. Seasonal effects (Spring, Summer, etc.) appear relatively weak overall, but their inclusion contributes slightly to the variance explained.
Rating Classifications (PG, R, PG-13): A strong negative relationship exists between “Good” films and the PG-13 rating, while the R rating has a significant positive relationship with “Poor.”
Fit and Complexity:
The addition of the polynomial term has slightly reduced residual deviance compared to the simpler logistic regression model. However, the improvement is marginal, suggesting that the added complexity might not provide substantial benefit in predictive accuracy. Coefficients for some genres and ratings (e.g., Main_History, G, PG) have relatively high standard errors, indicating less certainty in their contribution to the model.
Observations:
Residual Deviance (1576.256): Reflects the remaining unexplained variance. A lower deviance compared to the simpler multinomial model indicates better fit. AIC (1688.256): While AIC decreased slightly compared to the base model, the difference is small, and overfitting might be a concern with added polynomial terms. Quadratic Relationship in Budget: The model suggests that both very low and very high production budgets might associate with different rating categories, though further testing (e.g., visualization) would validate this interpretation.
Evaluating Polynomial Logistic Regression
# Predict class probabilities on test data
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")
# Predict classes on test data
predicted_classes <- predict(polynomial_logistic_model, newdata = test_data, type = "class")
# Convert predicted classes to a factor
predicted_classes <- factor(predicted_classes, levels = levels(test_data$IMDB_Category))
# Ensure true labels are factors
true_classes <- factor(test_data$IMDB_Category, levels = levels(test_data$IMDB_Category))
library(caret)
# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 25 33 9
## Good 41 93 66
## Poor 8 37 44
##
## Overall Statistics
##
## Accuracy : 0.4551
## 95% CI : (0.4025, 0.5084)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.56280
##
## Kappa : 0.1212
##
## Mcnemar's Test P-Value : 0.02813
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.33784 0.5706 0.3697
## Specificity 0.85106 0.4456 0.8101
## Pos Pred Value 0.37313 0.4650 0.4944
## Neg Pred Value 0.83045 0.5513 0.7191
## Prevalence 0.20787 0.4579 0.3343
## Detection Rate 0.07022 0.2612 0.1236
## Detection Prevalence 0.18820 0.5618 0.2500
## Balanced Accuracy 0.59445 0.5081 0.5899
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# One-vs-All ROC and AUC
for (category in levels(true_classes)) {
# Create binary labels for the current category
true_binary <- ifelse(true_classes == category, 1, 0)
# Get predicted probabilities for the current category
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if true_binary has fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.6811865
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Good : 0.5362853
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Poor : 0.6851044
library(dplyr)
# Specify the category for Lift Chart (e.g., "Good")
category <- "Good"
# Ensure predicted probabilities are in a data frame
predicted_probs_df <- as.data.frame(predicted_probabilities)
# Combine test data with predicted probabilities
test_data_lift <- test_data %>%
mutate(predicted_prob = predicted_probs_df[[category]])
# Sort by predicted probabilities
test_data_lift <- test_data_lift %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_lift$decile <- ntile(test_data_lift$predicted_prob, 10)
# Calculate Lift
lift_table <- test_data_lift %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(IMDB_Category == category), # Adjust for binary or multi-class context
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
)
# Plot Lift Chart
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
Confusion Matrix Overview:
Accuracy: 45.51% (indicating that less than half of the test data was correctly classified). Kappa: 0.1212 (a low value, suggesting weak agreement between predicted and actual classes beyond chance). McNemar’s Test P-Value: 0.02813 (significant, implying potential imbalance or systematic differences in misclassification).
Class-Level Performance:
Excellent:
Sensitivity: 33.78% (low, many “Excellent” cases are misclassified). Specificity: 85.11% (good, few non-“Excellent” cases are classified as “Excellent”). AUC: 0.6812 (moderate discriminatory power for “Excellent”).
Good:
Sensitivity: 57.06% (moderate, but nearly half of “Good” cases are misclassified). Specificity: 44.56% (low, many non-“Good” cases are misclassified as “Good”). AUC: 0.5363 (poor discriminatory power for “Good”).
Poor:
Sensitivity: 36.97% (low, many “Poor” cases are misclassified). Specificity: 81.01% (good, relatively few non-“Poor” cases are classified as “Poor”). AUC: 0.6851 (moderate discriminatory power for “Poor”).
Decision tree
# Load required libraries
library(rpart)
library(rpart.plot)
# Train Decision Tree for Classification
dt_model_categorical <- rpart(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "class", # Classification tree
control = rpart.control(
cp = 0.005, # Smaller complexity parameter for more splits
maxdepth = 10, # Allow deeper trees
minsplit = 10 # Minimum observations required to split
)
)
# Plot the decision tree
rpart.plot(
dt_model_categorical,
type = 3, # Show splits and probabilities
extra = 101, # Display n, % observations, and class probabilities
under = TRUE, # Show text under the nodes
fallen.leaves = TRUE, # Spread the leaves horizontally
box.palette = "Blues" # Color scheme for the boxes
)
# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = IMDB_Category ~ Log_production_budget_adj + PG.13 +
## R + PG + G + between_90_to_135 + Greater_than_135 + Spring +
## Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data,
## method = "class", control = rpart.control(cp = 0.005, maxdepth = 10,
## minsplit = 10))
## n= 828
##
## CP nsplit rel error xerror xstd
## 1 0.035947712 0 1.0000000 1.0000000 0.03115959
## 2 0.021786492 2 0.9281046 0.9542484 0.03129259
## 3 0.008714597 5 0.8605664 0.9215686 0.03133790
## 4 0.006535948 6 0.8518519 0.9084967 0.03134448
## 5 0.005446623 7 0.8453159 0.9389978 0.03131887
## 6 0.005083515 13 0.8082789 0.9302832 0.03132985
## 7 0.005000000 19 0.7777778 0.9346405 0.03132472
##
## Variable importance
## Log_production_budget_adj Main_Drama genre_count
## 27 19 11
## between_90_to_135 R Greater_than_135
## 8 7 6
## PG.13 Main_Horror Fall
## 5 4 3
## Summer Main_Crime Spring
## 3 2 2
## PG Main_Thriller
## 1 1
##
## Node number 1: 828 observations, complexity param=0.03594771
## predicted class=Good expected loss=0.5543478 P(node) =1
## class counts: 199 369 260
## probabilities: 0.240 0.446 0.314
## left son=2 (221 obs) right son=3 (607 obs)
## Primary splits:
## Main_Drama < 0.5 to the right, improve=16.610020, (0 missing)
## Greater_than_135 < 0.5 to the right, improve= 6.255695, (0 missing)
## Fall < 0.5 to the right, improve= 5.406866, (0 missing)
## Main_Horror < 0.5 to the left, improve= 5.275756, (0 missing)
## R < 0.5 to the right, improve= 3.838123, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 13.55018 to the left, agree=0.738, adj=0.018, (0 split)
## Greater_than_135 < 0.5 to the right, agree=0.736, adj=0.009, (0 split)
##
## Node number 2: 221 observations, complexity param=0.03594771
## predicted class=Excellent expected loss=0.5701357 P(node) =0.2669082
## class counts: 95 83 43
## probabilities: 0.430 0.376 0.195
## left son=4 (141 obs) right son=5 (80 obs)
## Primary splits:
## Log_production_budget_adj < 16.21795 to the right, improve=6.904754, (0 missing)
## genre_count < 1.5 to the left, improve=2.962116, (0 missing)
## Spring < 0.5 to the left, improve=2.795708, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=2.789332, (0 missing)
## Fall < 0.5 to the right, improve=2.387155, (0 missing)
## Surrogate splits:
## Summer < 0.5 to the left, agree=0.665, adj=0.075, (0 split)
##
## Node number 3: 607 observations, complexity param=0.02178649
## predicted class=Good expected loss=0.5288303 P(node) =0.7330918
## class counts: 104 286 217
## probabilities: 0.171 0.471 0.357
## left son=6 (289 obs) right son=7 (318 obs)
## Primary splits:
## R < 0.5 to the right, improve=5.952452, (0 missing)
## PG.13 < 0.5 to the left, improve=4.376010, (0 missing)
## Main_Crime < 0.5 to the right, improve=3.811016, (0 missing)
## genre_count < 1.5 to the right, improve=3.364676, (0 missing)
## Main_Horror < 0.5 to the left, improve=2.925027, (0 missing)
## Surrogate splits:
## PG.13 < 0.5 to the left, agree=0.857, adj=0.699, (0 split)
## Log_production_budget_adj < 17.72691 to the left, agree=0.623, adj=0.208, (0 split)
## PG < 0.5 to the left, agree=0.608, adj=0.176, (0 split)
## Main_Crime < 0.5 to the right, agree=0.568, adj=0.093, (0 split)
## Main_Horror < 0.5 to the right, agree=0.554, adj=0.062, (0 split)
##
## Node number 4: 141 observations, complexity param=0.005446623
## predicted class=Excellent expected loss=0.4964539 P(node) =0.1702899
## class counts: 71 38 32
## probabilities: 0.504 0.270 0.227
## left son=8 (35 obs) right son=9 (106 obs)
## Primary splits:
## genre_count < 1.5 to the left, improve=4.003980, (0 missing)
## Spring < 0.5 to the left, improve=3.278014, (0 missing)
## Fall < 0.5 to the right, improve=2.241058, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.819467, (0 missing)
## Log_production_budget_adj < 16.30641 to the left, improve=1.525703, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 16.38258 to the left, agree=0.773, adj=0.086, (0 split)
##
## Node number 5: 80 observations
## predicted class=Good expected loss=0.4375 P(node) =0.09661836
## class counts: 24 45 11
## probabilities: 0.300 0.562 0.138
##
## Node number 6: 289 observations, complexity param=0.008714597
## predicted class=Good expected loss=0.4775087 P(node) =0.3490338
## class counts: 59 151 79
## probabilities: 0.204 0.522 0.273
## left son=12 (255 obs) right son=13 (34 obs)
## Primary splits:
## genre_count < 1.5 to the right, improve=5.000923, (0 missing)
## Main_Horror < 0.5 to the left, improve=3.495508, (0 missing)
## Summer < 0.5 to the left, improve=1.745486, (0 missing)
## Main_Crime < 0.5 to the right, improve=1.657563, (0 missing)
## Log_production_budget_adj < 18.80864 to the right, improve=1.486978, (0 missing)
##
## Node number 7: 318 observations, complexity param=0.02178649
## predicted class=Poor expected loss=0.5660377 P(node) =0.384058
## class counts: 45 135 138
## probabilities: 0.142 0.425 0.434
## left son=14 (74 obs) right son=15 (244 obs)
## Primary splits:
## Fall < 0.5 to the right, improve=2.927095, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=2.488508, (0 missing)
## Log_production_budget_adj < 18.96643 to the right, improve=2.470616, (0 missing)
## Main_Animation < 0.5 to the right, improve=2.173962, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.881858, (0 missing)
##
## Node number 8: 35 observations
## predicted class=Excellent expected loss=0.2571429 P(node) =0.04227053
## class counts: 26 5 4
## probabilities: 0.743 0.143 0.114
##
## Node number 9: 106 observations, complexity param=0.005446623
## predicted class=Excellent expected loss=0.5754717 P(node) =0.1280193
## class counts: 45 33 28
## probabilities: 0.425 0.311 0.264
## left son=18 (88 obs) right son=19 (18 obs)
## Primary splits:
## Spring < 0.5 to the left, improve=2.117829, (0 missing)
## Log_production_budget_adj < 17.01084 to the left, improve=1.917719, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.539968, (0 missing)
## Fall < 0.5 to the right, improve=1.296415, (0 missing)
## PG.13 < 0.5 to the left, improve=1.177982, (0 missing)
##
## Node number 12: 255 observations, complexity param=0.005446623
## predicted class=Good expected loss=0.4666667 P(node) =0.307971
## class counts: 59 136 60
## probabilities: 0.231 0.533 0.235
## left son=24 (227 obs) right son=25 (28 obs)
## Primary splits:
## Main_Horror < 0.5 to the left, improve=2.888411, (0 missing)
## Summer < 0.5 to the left, improve=1.369162, (0 missing)
## Main_Mystery < 0.5 to the right, improve=1.327521, (0 missing)
## Log_production_budget_adj < 16.38519 to the left, improve=1.134223, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=0.990274, (0 missing)
##
## Node number 13: 34 observations
## predicted class=Poor expected loss=0.4411765 P(node) =0.0410628
## class counts: 0 15 19
## probabilities: 0.000 0.441 0.559
##
## Node number 14: 74 observations
## predicted class=Good expected loss=0.472973 P(node) =0.08937198
## class counts: 13 39 22
## probabilities: 0.176 0.527 0.297
##
## Node number 15: 244 observations, complexity param=0.02178649
## predicted class=Poor expected loss=0.5245902 P(node) =0.294686
## class counts: 32 96 116
## probabilities: 0.131 0.393 0.475
## left son=30 (38 obs) right son=31 (206 obs)
## Primary splits:
## Log_production_budget_adj < 18.63072 to the right, improve=3.104970, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.931922, (0 missing)
## Summer < 0.5 to the right, improve=1.664575, (0 missing)
## genre_count < 5.5 to the left, improve=1.358275, (0 missing)
## Main_Crime < 0.5 to the right, improve=1.318719, (0 missing)
##
## Node number 18: 88 observations, complexity param=0.005446623
## predicted class=Excellent expected loss=0.5340909 P(node) =0.1062802
## class counts: 41 28 19
## probabilities: 0.466 0.318 0.216
## left son=36 (36 obs) right son=37 (52 obs)
## Primary splits:
## Log_production_budget_adj < 17.01084 to the left, improve=1.993201, (0 missing)
## PG.13 < 0.5 to the left, improve=1.461364, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.288961, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.288961, (0 missing)
## R < 0.5 to the right, improve=1.039852, (0 missing)
## Surrogate splits:
## Summer < 0.5 to the right, agree=0.636, adj=0.111, (0 split)
## PG < 0.5 to the right, agree=0.602, adj=0.028, (0 split)
##
## Node number 19: 18 observations
## predicted class=Poor expected loss=0.5 P(node) =0.02173913
## class counts: 4 5 9
## probabilities: 0.222 0.278 0.500
##
## Node number 24: 227 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.4625551 P(node) =0.2741546
## class counts: 58 122 47
## probabilities: 0.256 0.537 0.207
## left son=48 (166 obs) right son=49 (61 obs)
## Primary splits:
## Summer < 0.5 to the left, improve=1.397154, (0 missing)
## Main_Mystery < 0.5 to the left, improve=1.311551, (0 missing)
## Log_production_budget_adj < 17.81216 to the left, improve=1.250035, (0 missing)
## Main_Action < 0.5 to the left, improve=1.179562, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=0.898311, (0 missing)
## Surrogate splits:
## Main_Science_Fiction < 0.5 to the left, agree=0.736, adj=0.016, (0 split)
##
## Node number 25: 28 observations, complexity param=0.005446623
## predicted class=Good expected loss=0.5 P(node) =0.03381643
## class counts: 1 14 13
## probabilities: 0.036 0.500 0.464
## left son=50 (19 obs) right son=51 (9 obs)
## Primary splits:
## between_90_to_135 < 0.5 to the right, improve=2.3437760, (0 missing)
## genre_count < 2.5 to the right, improve=1.5952380, (0 missing)
## Log_production_budget_adj < 16.80043 to the right, improve=1.2285710, (0 missing)
## Fall < 0.5 to the left, improve=0.8964859, (0 missing)
## Spring < 0.5 to the right, improve=0.6127820, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 13.88181 to the right, agree=0.75, adj=0.222, (0 split)
##
## Node number 30: 38 observations
## predicted class=Good expected loss=0.4210526 P(node) =0.04589372
## class counts: 5 22 11
## probabilities: 0.132 0.579 0.289
##
## Node number 31: 206 observations, complexity param=0.006535948
## predicted class=Poor expected loss=0.4902913 P(node) =0.2487923
## class counts: 27 74 105
## probabilities: 0.131 0.359 0.510
## left son=62 (5 obs) right son=63 (201 obs)
## Primary splits:
## Main_Crime < 0.5 to the right, improve=1.575144, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.549419, (0 missing)
## Log_production_budget_adj < 16.57526 to the right, improve=1.318611, (0 missing)
## genre_count < 5.5 to the left, improve=1.176957, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=1.117432, (0 missing)
##
## Node number 36: 36 observations
## predicted class=Excellent expected loss=0.3888889 P(node) =0.04347826
## class counts: 22 8 6
## probabilities: 0.611 0.222 0.167
##
## Node number 37: 52 observations, complexity param=0.005446623
## predicted class=Good expected loss=0.6153846 P(node) =0.06280193
## class counts: 19 20 13
## probabilities: 0.365 0.385 0.250
## left son=74 (11 obs) right son=75 (41 obs)
## Primary splits:
## between_90_to_135 < 0.5 to the left, improve=2.7539660, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=2.7539660, (0 missing)
## Log_production_budget_adj < 17.1904 to the right, improve=2.3944540, (0 missing)
## R < 0.5 to the right, improve=0.5796703, (0 missing)
## PG.13 < 0.5 to the left, improve=0.5000000, (0 missing)
## Surrogate splits:
## Greater_than_135 < 0.5 to the right, agree=1.000, adj=1.000, (0 split)
## Summer < 0.5 to the right, agree=0.808, adj=0.091, (0 split)
##
## Node number 48: 166 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.5 P(node) =0.2004831
## class counts: 47 83 36
## probabilities: 0.283 0.500 0.217
## left son=96 (119 obs) right son=97 (47 obs)
## Primary splits:
## Log_production_budget_adj < 17.45673 to the left, improve=2.0249130, (0 missing)
## Main_Mystery < 0.5 to the left, improve=1.5460360, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.2166270, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=0.9479144, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=0.8451745, (0 missing)
## Surrogate splits:
## Greater_than_135 < 0.5 to the left, agree=0.759, adj=0.149, (0 split)
## genre_count < 4.5 to the left, agree=0.735, adj=0.064, (0 split)
## Main_Fantasy < 0.5 to the left, agree=0.735, adj=0.064, (0 split)
## Main_Science_Fiction < 0.5 to the left, agree=0.723, adj=0.021, (0 split)
##
## Node number 49: 61 observations
## predicted class=Good expected loss=0.3606557 P(node) =0.0736715
## class counts: 11 39 11
## probabilities: 0.180 0.639 0.180
##
## Node number 50: 19 observations
## predicted class=Good expected loss=0.3684211 P(node) =0.02294686
## class counts: 1 12 6
## probabilities: 0.053 0.632 0.316
##
## Node number 51: 9 observations
## predicted class=Poor expected loss=0.2222222 P(node) =0.01086957
## class counts: 0 2 7
## probabilities: 0.000 0.222 0.778
##
## Node number 62: 5 observations
## predicted class=Good expected loss=0.2 P(node) =0.006038647
## class counts: 0 4 1
## probabilities: 0.000 0.800 0.200
##
## Node number 63: 201 observations
## predicted class=Poor expected loss=0.4825871 P(node) =0.2427536
## class counts: 27 70 104
## probabilities: 0.134 0.348 0.517
##
## Node number 74: 11 observations
## predicted class=Excellent expected loss=0.2727273 P(node) =0.01328502
## class counts: 8 2 1
## probabilities: 0.727 0.182 0.091
##
## Node number 75: 41 observations
## predicted class=Good expected loss=0.5609756 P(node) =0.04951691
## class counts: 11 18 12
## probabilities: 0.268 0.439 0.293
##
## Node number 96: 119 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.4453782 P(node) =0.1437198
## class counts: 32 66 21
## probabilities: 0.269 0.555 0.176
## left son=192 (51 obs) right son=193 (68 obs)
## Primary splits:
## Log_production_budget_adj < 16.38258 to the left, improve=2.3487390, (0 missing)
## Main_Mystery < 0.5 to the left, improve=0.9288612, (0 missing)
## genre_count < 2.5 to the left, improve=0.7656663, (0 missing)
## Main_Comedy < 0.5 to the right, improve=0.7382564, (0 missing)
## Fall < 0.5 to the right, improve=0.6309724, (0 missing)
## Surrogate splits:
## between_90_to_135 < 0.5 to the left, agree=0.639, adj=0.157, (0 split)
## Main_Romance < 0.5 to the right, agree=0.605, adj=0.078, (0 split)
## Main_Comedy < 0.5 to the right, agree=0.597, adj=0.059, (0 split)
##
## Node number 97: 47 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.6382979 P(node) =0.05676329
## class counts: 15 17 15
## probabilities: 0.319 0.362 0.319
## left son=194 (9 obs) right son=195 (38 obs)
## Primary splits:
## Greater_than_135 < 0.5 to the right, improve=2.042678, (0 missing)
## Main_Comedy < 0.5 to the left, improve=2.009929, (0 missing)
## Log_production_budget_adj < 17.65504 to the left, improve=1.368488, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=1.120030, (0 missing)
## Main_Crime < 0.5 to the right, improve=1.044163, (0 missing)
## Surrogate splits:
## between_90_to_135 < 0.5 to the left, agree=0.957, adj=0.778, (0 split)
## Log_production_budget_adj < 18.42106 to the right, agree=0.851, adj=0.222, (0 split)
## Main_Thriller < 0.5 to the right, agree=0.851, adj=0.222, (0 split)
##
## Node number 192: 51 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.5686275 P(node) =0.0615942
## class counts: 19 22 10
## probabilities: 0.373 0.431 0.196
## left son=384 (14 obs) right son=385 (37 obs)
## Primary splits:
## Log_production_budget_adj < 16.01368 to the right, improve=2.5246420, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.1500750, (0 missing)
## Main_Crime < 0.5 to the right, improve=0.5836317, (0 missing)
## genre_count < 3.5 to the right, improve=0.4938440, (0 missing)
## Fall < 0.5 to the right, improve=0.4172967, (0 missing)
##
## Node number 193: 68 observations
## predicted class=Good expected loss=0.3529412 P(node) =0.0821256
## class counts: 13 44 11
## probabilities: 0.191 0.647 0.162
##
## Node number 194: 9 observations
## predicted class=Excellent expected loss=0.3333333 P(node) =0.01086957
## class counts: 6 2 1
## probabilities: 0.667 0.222 0.111
##
## Node number 195: 38 observations, complexity param=0.005083515
## predicted class=Good expected loss=0.6052632 P(node) =0.04589372
## class counts: 9 15 14
## probabilities: 0.237 0.395 0.368
## left son=390 (10 obs) right son=391 (28 obs)
## Primary splits:
## Log_production_budget_adj < 17.65504 to the left, improve=2.1323310, (0 missing)
## genre_count < 4.5 to the right, improve=1.1718270, (0 missing)
## Main_Comedy < 0.5 to the left, improve=0.9953560, (0 missing)
## Main_Crime < 0.5 to the right, improve=0.9061404, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=0.4847118, (0 missing)
##
## Node number 384: 14 observations
## predicted class=Excellent expected loss=0.4285714 P(node) =0.01690821
## class counts: 8 2 4
## probabilities: 0.571 0.143 0.286
##
## Node number 385: 37 observations
## predicted class=Good expected loss=0.4594595 P(node) =0.04468599
## class counts: 11 20 6
## probabilities: 0.297 0.541 0.162
##
## Node number 390: 10 observations
## predicted class=Excellent expected loss=0.5 P(node) =0.01207729
## class counts: 5 1 4
## probabilities: 0.500 0.100 0.400
##
## Node number 391: 28 observations
## predicted class=Good expected loss=0.5 P(node) =0.03381643
## class counts: 4 14 10
## probabilities: 0.143 0.500 0.357
Top Important Features:
Log_production_budget_adj (27% importance). Main_Drama (19% importance). genre_count (11% importance). Other significant features include between_90_to_135, R, and Greater_than_135.
Primary Split:
The root node splits on Main_Drama, indicating its high influence in categorizing IMDb ratings.
Tree Insights:
The tree progresses with logical splits like budget size (Log_production_budget_adj) and other categorical predictors such as seasonality (Spring, Fall) and genres (Main_Horror, Main_Crime). Leaf nodes provide predictions with class probabilities, helping interpret how combinations of features lead to specific IMDb categories.
Observations from the Tree Diagram:
The tree visually reveals how combinations of predictors classify movies into Excellent, Good, or Poor.
Evaluating Decision tree
# Predict class probabilities on test data
predicted_probabilities <- predict(dt_model_categorical, newdata = test_data, type = "prob")
# Predict classes on test data
predicted_classes <- predict(dt_model_categorical, newdata = test_data, type = "class")
# Ensure true labels are factors
true_classes <- factor(test_data$IMDB_Category, levels = levels(train_data$IMDB_Category))
# Convert predicted classes to factors
predicted_classes <- factor(predicted_classes, levels = levels(train_data$IMDB_Category))
library(caret)
# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 19 29 8
## Good 41 85 53
## Poor 14 49 58
##
## Overall Statistics
##
## Accuracy : 0.4551
## 95% CI : (0.4025, 0.5084)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.5628
##
## Kappa : 0.1259
##
## Mcnemar's Test P-Value : 0.2781
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.25676 0.5215 0.4874
## Specificity 0.86879 0.5130 0.7342
## Pos Pred Value 0.33929 0.4749 0.4793
## Neg Pred Value 0.81667 0.5593 0.7404
## Prevalence 0.20787 0.4579 0.3343
## Detection Rate 0.05337 0.2388 0.1629
## Detection Prevalence 0.15730 0.5028 0.3399
## Balanced Accuracy 0.56278 0.5172 0.6108
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
# Create binary labels: 1 for the current class, 0 for all others
true_binary <- ifelse(true_classes == category, 1, 0)
# Get predicted probabilities for the current class
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if true_binary has fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.7017203
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Good : 0.5223148
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Poor : 0.6566855
library(dplyr)
# Initialize a list to store Lift Tables for each class
lift_tables <- list()
# Loop through each class
for (category in levels(true_classes)) {
# Combine test data with predicted probabilities
test_data_category <- test_data %>%
mutate(predicted_prob = predicted_probabilities[, category])
# Sort by predicted probabilities for the current class
test_data_category <- test_data_category %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
# Calculate Lift Table
lift_table <- test_data_category %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(IMDB_Category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
)
# Store the Lift Table
lift_tables[[category]] <- lift_table
# Plot Lift Chart for the current category
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}
Accuracy: The model achieves an overall accuracy of 45.51%, meaning it correctly classifies 45.51% of the samples. Kappa (0.1259): This metric considers the possibility of correct predictions by chance. A low kappa indicates the model’s performance is only slightly better than random guessing.
McNemar’s Test: The p-value (0.2781) suggests that the errors are not significantly imbalanced between the categories.
Per-Class Statistics:
Class: Excellent:
Sensitivity (25.68%): The model identifies only 25.68% of the true Excellent samples correctly. Specificity (86.88%): It performs well at avoiding false positives for Excellent. Balanced Accuracy (56.28%): An average of sensitivity and specificity, showing moderate performance for this class.
Class: Good:
Sensitivity (52.15%): The model identifies about half of the Good samples correctly. Specificity (51.30%): Low ability to avoid false positives for Good. Balanced Accuracy (51.72%): Slightly better than random performance.
Class: Poor:
Sensitivity (48.74%): The model identifies 48.74% of true Poor samples. Specificity (73.42%): Relatively good at avoiding false positives for Poor. Balanced Accuracy (61.08%): Decent performance for this category.
AUC (Area Under the Curve):
Excellent: AUC of 0.7017 indicates moderately good discrimination. Good: AUC of 0.5223 suggests the model struggles to differentiate Good from the other classes. Poor: AUC of 0.6567 indicates better discrimination for Poor compared to Good.
For optimizing film investment, the best model is Random Forest.
Why?
Robust Performance: Random Forest provides consistent and reliable accuracy, ensuring a balanced prediction across all IMDB categories. This minimizes risks in film investment by avoiding overfitting to specific patterns.
Feature Importance: The model naturally ranks features like Log_production_budget_adj, Main_Drama, and genre_count, which are crucial for understanding and predicting the profitability of films. This helps focus on key factors that drive returns.
Handling Complexity: Random Forest effectively handles nonlinear relationships and interactions between features like budget, genre, and seasonal trends, providing insights into complex dependencies that impact film success.
Interpretability: Unlike other models, Random Forest offers clear insights through variable importance plots and easy-to-understand predictions, enabling better decision-making for investments.
Robust to Noise: It is highly robust to outliers and noise in the data, ensuring stable predictions even with imperfect historical data, making it ideal for predicting returns in uncertain film markets.
Conclusion: Random Forest aligns with the business goal of maximizing returns by providing accurate, interpretable, and robust predictions for film investment decisions.
Critic_Score
# Categorize Log_worldwide_gross_adj into buckets
data <- data %>%
mutate(Critic_score_category = case_when(
Critic_score <= 38 ~ "Unpopular",
Critic_score > 38 & Critic_score <= 67 ~ "Moderate",
Critic_score > 67 ~ "Popular"
))
# # Convert to factor
data$Critic_score_category <- as.factor(data$Critic_score_category)
# Check if the transformation is correct
table(data$Critic_score_category)
##
## Moderate Popular Unpopular
## 353 429 402
Categorized Critic_score into three distinct categories: Unpopular, Moderate, and Popular, based on the following thresholds:
Unpopular: Critic scores ≤ 38 Moderate: Critic scores between 38 and 67 (exclusive of both ends) Popular: Critic scores > 67 The result is a balanced categorical distribution:
Moderate: 353 entries Popular: 429 entries Unpopular: 402 entries
This categorization transforms the continuous critic score into a factor, which is well-suited for classification models. It will enable us to predict how different factors contribute to the likelihood of a film falling into each category.
Spliting the data
# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet) # For Ridge and LASSO regression
library(randomForest) # For Random Forest model
library(xgboost) # For Gradient Boosting model
library(Metrics) # For evaluation metrics
set.seed(123) # For reproducibility
# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]
Multinomial logistic regression
# Load the required library
library(nnet)
# Fit the multinomial logistic regression model
multinom_model <- multinom(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
data = train_data
)
## # weights: 84 (54 variable)
## initial value 909.650975
## iter 10 value 838.825698
## iter 20 value 828.388787
## iter 30 value 826.477567
## iter 40 value 825.952070
## iter 50 value 825.753253
## final value 825.726103
## converged
# View model summary
summary(multinom_model)
## Call:
## multinom(formula = Critic_score_category ~ Log_production_budget_adj +
## PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History,
## data = train_data)
##
## Coefficients:
## (Intercept) Log_production_budget_adj PG.13 R
## Popular 15.376932 -0.30357222 -11.367107 -11.230994
## Unpopular -3.981568 0.09646786 4.858507 4.226229
## PG G between_90_to_135 Greater_than_135 Spring
## Popular -11.373409 -9.352309 0.08757648 0.4904924 -0.4731550
## Unpopular 4.847157 -5.849016 -0.81519948 -1.5760898 -0.3449393
## Summer Fall genre_count Main_Action Main_Adventure
## Popular -0.04348603 0.1714663 0.06255986 0.5486513 0.9053951
## Unpopular -0.72742514 -0.3816076 -0.10477532 -0.7044189 -1.2277867
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular 2.006018 1.0511379 0.8353981 0.4662384 1.4415091
## Unpopular -1.815775 -0.7921482 -0.7206513 -18.7251508 -0.7117834
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_Romance
## Popular 0.04761462 2.472294 0.4807710 0.7496022 1.0096143
## Unpopular -2.79715570 1.190501 -0.4683647 0.1500117 -0.5279382
## Main_Science_Fiction Main_Thriller Main_History
## Popular 1.45091175 0.7238196 1.0116750
## Unpopular -0.08134442 -0.5647925 -0.6949359
##
## Std. Errors:
## (Intercept) Log_production_budget_adj PG.13 R PG
## Popular 1.592421 0.08854112 0.4696713 0.4459946 0.5229701
## Unpopular 1.392592 0.09681059 0.4901424 0.4549889 0.5476292
## G between_90_to_135 Greater_than_135 Spring Summer
## Popular 1.087108e+00 0.3452727 0.5146064 0.2814829 0.2706812
## Unpopular 7.893398e-05 0.3280348 0.5631904 0.2602138 0.2739543
## Fall genre_count Main_Action Main_Adventure Main_Animation
## Popular 0.2627186 0.1005230 1.25815 1.3116615 1.390671
## Unpopular 0.2646064 0.1023577 0.90605 0.9815192 1.188054
## Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Popular 1.254572 1.2871189 1.522110e+00 1.2508904 1.530363
## Unpopular 0.912116 0.9663221 1.268012e-06 0.9095735 1.448535
## Main_Fantasy Main_Horror Main_Mystery Main_Romance
## Popular 1.674269 1.294508 1.594372 1.356640
## Unpopular 1.380665 0.953787 1.231029 1.030036
## Main_Science_Fiction Main_Thriller Main_History
## Popular 1.430128 1.284191 1.556984
## Unpopular 1.139918 0.946436 1.358771
##
## Residual Deviance: 1651.452
## AIC: 1759.452
Coefficients Interpretation:
Each coefficient indicates the impact of the respective predictor on the log-odds of a category compared to the reference category. For example, in the “Popular” category, Log_production_budget_adj has a coefficient of -0.3035. This means that a one-unit increase in the log of the production budget decreases the log-odds of being “Popular” compared to the reference category.
Category-Specific Coefficients:
“Popular” Category:
High positive coefficients for features like Main_Fantasy, Main_Drama, and Main_Comedy indicate these genres significantly increase the likelihood of a film being categorized as “Popular.”
“Unpopular” Category: Negative coefficients for variables like Main_Action and PG.13 suggest these attributes decrease the likelihood of a film being categorized as “Unpopular.”
Variable Significance:
Variables with larger coefficients and smaller standard errors have a more significant impact on the prediction.
Model Fit:
Residual Deviance: 1651.45, which measures the goodness of fit. Lower values indicate a better fit. AIC (Akaike Information Criterion): 1759.45, used for model comparison. Lower values are better, indicating a more parsimonious model.
Practical Insights for Film Investment:
Films with higher budgets (Log_production_budget_adj) and popular genres like Fantasy, Drama, and Comedy are more likely to achieve a “Popular” critic score. Conversely, targeting features like Action or PG-rated films might reduce the risk of being categorized as “Unpopular.”
Evaluating Multinomial logistic regression
library(pROC)
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")
# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
data = factor(test_data$predicted_categories, levels = levels(test_data$Critic_score_category)),
reference = factor(test_data$Critic_score_category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 19 10 13
## Popular 37 85 41
## Unpopular 55 25 71
##
## Overall Statistics
##
## Accuracy : 0.4916
## 95% CI : (0.4385, 0.5448)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 3.746e-08
##
## Kappa : 0.2296
##
## Mcnemar's Test P-Value : 7.871e-10
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.17117 0.7083 0.5680
## Specificity 0.90612 0.6695 0.6537
## Pos Pred Value 0.45238 0.5215 0.4702
## Neg Pred Value 0.70701 0.8187 0.7366
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.05337 0.2388 0.1994
## Detection Prevalence 0.11798 0.4579 0.4242
## Balanced Accuracy 0.53865 0.6889 0.6108
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$Critic_score_category)
for (category in categories) {
# Create binary response for "One-vs-All"
true_binary <- ifelse(test_data$Critic_score_category == category, 1, 0)
predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
# ROC Curve
roc_obj_test <- roc(true_binary, predicted_probs)
roc_list_test[[category]] <- roc_obj_test
auc_list_test[[category]] <- auc(roc_obj_test)
# Plot ROC Curve for this class
plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate on Test Data: 0.5842618
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Popular on Test Data: 0.7382415
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Unpopular on Test Data: 0.6526061
# Lift Chart for each category
for (category in categories) {
# Sort data by predicted probabilities for the chosen category
test_data <- test_data %>%
arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
# Add deciles for the chosen category
test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
# Calculate Lift
lift_table_test <- test_data %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Critic_score_category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
)
# Plot Lift Chart for the current category
plot(
lift_table_test$decile, lift_table_test$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category, "on Test Data")
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}
Confusion Matrix Summary:
Overall Accuracy:
The accuracy of the model is 49.16%, which is slightly better than random guessing but leaves room for improvement. 95% CI: The accuracy falls between 43.85% and 54.48%.
Class-Wise Performance:
Moderate: Sensitivity (True Positive Rate): 17.12%, indicating difficulty in identifying “Moderate” films. Specificity: 90.61%, meaning it rarely misclassifies other categories as “Moderate.”
Popular: Sensitivity: 70.83%, showing strong capability in identifying “Popular” films. Specificity: 66.95%, meaning some films from other categories are misclassified as “Popular.”
Unpopular: Sensitivity: 56.80%, moderately good at identifying “Unpopular” films. Specificity: 65.37%, indicating some overlap in classification.
Kappa Statistic:
Kappa = 0.2296, indicating weak but meaningful agreement between predicted and actual labels.
McNemar’s Test:
The p-value (7.871e-10) indicates significant differences in the error rates, suggesting potential for improvement.
ROC and AUC Analysis:
Moderate: AUC: 0.584, showing weak discriminative ability for identifying “Moderate” films.
Popular: AUC: 0.738, the strongest performance among all categories, indicating good separation of “Popular” films from the others.
Unpopular: AUC: 0.653, indicating moderate performance for distinguishing “Unpopular” films.
Random Forest
# Load the required library
library(randomForest)
# Check for missing values in the target and predictor variables
sum(is.na(train_data$Critic_score_category)) # Check for missing values in the target
## [1] 0
sum(is.na(train_data$Log_production_budget_adj)) # Check in predictor variables
## [1] 0
# Remove rows with missing values in any column
train_data <- na.omit(train_data)
# Train Random Forest Model for Classification
rf_model <- randomForest(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
ntree = 500, # Number of trees
mtry = 5, # Number of predictors randomly selected at each split
importance = TRUE, # Calculate variable importance
proximity = TRUE # Enable proximity matrix for better insights
)
# View the model summary
print(rf_model)
##
## Call:
## randomForest(formula = Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, ntree = 500, mtry = 5, importance = TRUE, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 57.25%
## Confusion matrix:
## Moderate Popular Unpopular class.error
## Moderate 56 101 85 0.7685950
## Popular 69 176 64 0.4304207
## Unpopular 65 90 122 0.5595668
# Feature Importance
importance(rf_model)
## Moderate Popular Unpopular
## Log_production_budget_adj -5.7216051 19.191744646 1.04296015
## PG.13 -2.4280856 5.483846357 5.43853037
## R -0.1554883 6.342049162 3.71559750
## PG 4.3793232 6.059994439 -0.86511967
## G 0.3358989 8.898097523 4.17515058
## between_90_to_135 -3.0344863 0.650684465 5.51323410
## Greater_than_135 -1.1905628 5.135357005 -2.09562138
## Spring -0.2587980 7.054468992 -6.81666118
## Summer -3.8201685 5.163605430 -2.73191887
## Fall 0.8113437 6.323699763 -0.58809841
## genre_count -2.5320324 1.592881645 5.79971892
## Main_Action 3.8509152 11.280229112 -5.55986056
## Main_Adventure 1.5535627 -0.005996815 -5.63286077
## Main_Animation -1.0537341 5.088593976 -5.66106716
## Main_Comedy -5.0483588 5.030949874 2.42392026
## Main_Crime 0.5242691 -0.207408148 3.99398809
## Main_Documentary -2.9378110 -1.159651474 4.43153201
## Main_Drama -1.4850638 19.360889563 -1.19448099
## Main_Family 8.2925192 -0.014756823 0.59981493
## Main_Fantasy 0.4471958 -1.547376773 1.51037568
## Main_Horror -2.2563048 -0.344036815 0.59036403
## Main_Mystery -4.3008429 -1.128812528 0.99827230
## Main_History -4.6698961 -0.793959306 -2.77260201
## Main_Romance -4.7237143 -3.585018183 -0.04714954
## Main_Science_Fiction -3.2486970 -1.826148733 0.32346580
## Main_Thriller 0.3760518 2.229674132 1.40716837
## MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj 10.5565879 84.000810
## PG.13 6.5230852 8.884131
## R 7.7779231 8.574279
## PG 6.7709808 5.837082
## G 8.3514943 1.868033
## between_90_to_135 2.6750718 11.277900
## Greater_than_135 2.0715106 5.155201
## Spring 0.3991179 9.269284
## Summer -0.4552907 9.877839
## Fall 4.9532904 10.540366
## genre_count 3.3383661 30.148347
## Main_Action 5.5471946 8.349551
## Main_Adventure -2.7299650 4.293407
## Main_Animation 0.9181277 2.667557
## Main_Comedy 2.0898983 8.707040
## Main_Crime 2.4942552 5.638045
## Main_Documentary -0.8891891 1.530377
## Main_Drama 13.8847560 11.097511
## Main_Family 6.5400679 2.116554
## Main_Fantasy 0.5363267 2.907561
## Main_Horror -1.0042383 6.410964
## Main_Mystery -2.2299752 2.290019
## Main_History -4.6556230 1.552223
## Main_Romance -4.7681010 3.518845
## Main_Science_Fiction -2.3797897 3.338094
## Main_Thriller 2.4180128 6.305447
varImpPlot(rf_model) # Plot variable importance
Overall Performance:
Out-of-Bag (OOB) Error Rate: 57.25%. This means that approximately 57% of predictions on unseen training data are incorrect, indicating room for improvement or possible overfitting issues.
Class-specific Errors:
Moderate: 76.86% class error—this category is poorly predicted. Popular: 43.04% class error—the model performs relatively well here. Unpopular: 55.96% class error—moderate performance for this category.
Variable Importance:
The MeanDecreaseAccuracy and MeanDecreaseGini metrics indicate which variables are most influential:
Top Features by Importance:
Log_production_budget_adj: Strongest predictor across all categories. genre_count: Plays a significant role in prediction accuracy. Main_Drama: A key genre predictor for categorization. Fall and between_90_to_135 (runtime range): These provide important seasonal and runtime-related context.
Least Important Features:
Genres like Main_History, Main_Documentary, and Main_Romance have minimal impact on predictions.
Insights from Variable Importance Plot:
The importance plot shows that the budget and genres are crucial in determining whether a film is categorized as “Popular,” “Moderate,” or “Unpopular.” Seasonal variables (e.g., Fall, Summer) also contribute to some extent.
Key Observations:
The model struggles to accurately classify the “Moderate” category, likely due to overlaps in features across categories or insufficient training data specific to this group. Popular films are predicted with the highest accuracy, aligning with business goals that focus on recognizing high-performing films.
Evaluating Random Forest
# Predict class probabilities on test data
predicted_probabilities <- predict(rf_model, newdata = test_data, type = "prob")
# Predict classes on test data
predicted_classes <- predict(rf_model, newdata = test_data, type = "class")
# Ensure true labels are factors with the correct levels
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))
# Convert predicted classes to factors with the same levels
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))
library(caret)
# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 24 21 20
## Popular 45 78 40
## Unpopular 42 21 65
##
## Overall Statistics
##
## Accuracy : 0.4691
## 95% CI : (0.4163, 0.5224)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 3.058e-06
##
## Kappa : 0.1986
##
## Mcnemar's Test P-Value : 5.253e-05
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.21622 0.6500 0.5200
## Specificity 0.83265 0.6398 0.7273
## Pos Pred Value 0.36923 0.4785 0.5078
## Neg Pred Value 0.70103 0.7824 0.7368
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.06742 0.2191 0.1826
## Detection Prevalence 0.18258 0.4579 0.3596
## Balanced Accuracy 0.52443 0.6449 0.6236
# Extract key metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]
# Print metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4691011
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
## 0.2162162 0.65 0.52
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
## 0.8326531 0.6398305 0.7272727
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
# Create binary labels: 1 for current class, 0 for all others
true_binary <- ifelse(true_classes == category, 1, 0)
# Get predicted probabilities for the current class
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if binary labels have fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5480419
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Popular : 0.7070445
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Unpopular : 0.6922771
library(dplyr)
# Initialize a list to store Lift Tables for each class
lift_tables <- list()
# Loop through each class
for (category in levels(true_classes)) {
# Add predicted probabilities for sorting
test_data_category <- test_data %>%
mutate(predicted_prob = predicted_probabilities[, category])
# Sort by predicted probabilities for the current class
test_data_category <- test_data_category %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
# Calculate Lift Table
lift_table <- test_data_category %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Critic_score_category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
)
# Store the Lift Table
lift_tables[[category]] <- lift_table
# Plot Lift Chart for the current category
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}
Overall Accuracy: The model achieved an accuracy of 46.91%, which indicates moderate performance in predicting critic score categories.
Sensitivity: The ability to correctly identify each category varied:
Moderate: 21.62% sensitivity, indicating poor identification of this class. Popular: 65.00% sensitivity, showing the model performs best for this category. Unpopular: 52.00% sensitivity, showing moderate ability to identify this class.
Specificity: The ability to correctly exclude non-members of each class:
Moderate: 83.27%, indicating good performance in ruling out instances that are not Moderate. Popular: 63.98%, indicating moderate performance in excluding non-Popular categories. Unpopular: 72.73%, showing fair ability to exclude non-Unpopular instances.
AUC (Area Under Curve): Measures the model’s overall performance for each class:
Moderate: AUC = 0.548, indicating poor separation of Moderate from other categories. Popular: AUC = 0.707, showing fair separation for this class. Unpopular: AUC = 0.692, reflecting moderate performance for this category.
Conclusion:
The Random Forest model performs best for the “Popular” category, showing relatively high sensitivity and AUC. However, it struggles with the “Moderate” category, suggesting limitations in distinguishing this class effectively. The overall performance suggests the need for improvement, particularly for underperforming categories like “Moderate.”
XGBoost
# Load necessary libraries
library(xgboost)
# Prepare data for XGBoost
x_train <- model.matrix(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data
)[, -1] # Remove intercept column
# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$Critic_score_category) - 1
# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
data = as.matrix(x_train),
label = y_train,
objective = "multi:softprob", # Multi-class classification
num_class = length(levels(train_data$Critic_score_category)), # Number of classes
nrounds = 100, # Number of boosting rounds
max_depth = 6, # Tree depth
eta = 0.1, # Learning rate
colsample_bytree = 0.8, # Subsample ratio of columns
verbose = 1 # Print training progress
)
## [1] train-mlogloss:1.077933
## [2] train-mlogloss:1.057671
## [3] train-mlogloss:1.041463
## [4] train-mlogloss:1.024305
## [5] train-mlogloss:1.008889
## [6] train-mlogloss:0.994372
## [7] train-mlogloss:0.981503
## [8] train-mlogloss:0.964888
## [9] train-mlogloss:0.950778
## [10] train-mlogloss:0.936668
## [11] train-mlogloss:0.924759
## [12] train-mlogloss:0.914335
## [13] train-mlogloss:0.904313
## [14] train-mlogloss:0.896460
## [15] train-mlogloss:0.888847
## [16] train-mlogloss:0.880893
## [17] train-mlogloss:0.873080
## [18] train-mlogloss:0.864814
## [19] train-mlogloss:0.858445
## [20] train-mlogloss:0.850213
## [21] train-mlogloss:0.843088
## [22] train-mlogloss:0.837330
## [23] train-mlogloss:0.831897
## [24] train-mlogloss:0.825611
## [25] train-mlogloss:0.819434
## [26] train-mlogloss:0.813625
## [27] train-mlogloss:0.808139
## [28] train-mlogloss:0.802633
## [29] train-mlogloss:0.797602
## [30] train-mlogloss:0.791358
## [31] train-mlogloss:0.787822
## [32] train-mlogloss:0.784107
## [33] train-mlogloss:0.778533
## [34] train-mlogloss:0.773868
## [35] train-mlogloss:0.770273
## [36] train-mlogloss:0.766824
## [37] train-mlogloss:0.762580
## [38] train-mlogloss:0.758046
## [39] train-mlogloss:0.754330
## [40] train-mlogloss:0.748136
## [41] train-mlogloss:0.745279
## [42] train-mlogloss:0.740483
## [43] train-mlogloss:0.737294
## [44] train-mlogloss:0.733100
## [45] train-mlogloss:0.729472
## [46] train-mlogloss:0.726729
## [47] train-mlogloss:0.723034
## [48] train-mlogloss:0.720372
## [49] train-mlogloss:0.715739
## [50] train-mlogloss:0.710446
## [51] train-mlogloss:0.707980
## [52] train-mlogloss:0.704872
## [53] train-mlogloss:0.701038
## [54] train-mlogloss:0.697456
## [55] train-mlogloss:0.695029
## [56] train-mlogloss:0.691656
## [57] train-mlogloss:0.688859
## [58] train-mlogloss:0.687201
## [59] train-mlogloss:0.685493
## [60] train-mlogloss:0.682568
## [61] train-mlogloss:0.679697
## [62] train-mlogloss:0.676682
## [63] train-mlogloss:0.675025
## [64] train-mlogloss:0.671411
## [65] train-mlogloss:0.666427
## [66] train-mlogloss:0.663687
## [67] train-mlogloss:0.659878
## [68] train-mlogloss:0.657967
## [69] train-mlogloss:0.653613
## [70] train-mlogloss:0.652012
## [71] train-mlogloss:0.649098
## [72] train-mlogloss:0.646029
## [73] train-mlogloss:0.643313
## [74] train-mlogloss:0.641802
## [75] train-mlogloss:0.639229
## [76] train-mlogloss:0.636658
## [77] train-mlogloss:0.634901
## [78] train-mlogloss:0.633321
## [79] train-mlogloss:0.630792
## [80] train-mlogloss:0.629021
## [81] train-mlogloss:0.625248
## [82] train-mlogloss:0.622885
## [83] train-mlogloss:0.621223
## [84] train-mlogloss:0.618799
## [85] train-mlogloss:0.615751
## [86] train-mlogloss:0.613250
## [87] train-mlogloss:0.610869
## [88] train-mlogloss:0.608188
## [89] train-mlogloss:0.606144
## [90] train-mlogloss:0.603877
## [91] train-mlogloss:0.601677
## [92] train-mlogloss:0.599473
## [93] train-mlogloss:0.597914
## [94] train-mlogloss:0.595470
## [95] train-mlogloss:0.593429
## [96] train-mlogloss:0.591563
## [97] train-mlogloss:0.590139
## [98] train-mlogloss:0.588971
## [99] train-mlogloss:0.586972
## [100] train-mlogloss:0.584293
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
## Feature Gain Cover Frequency
## 1: Log_production_budget_adj 0.470892612 0.467577316 0.426777006
## 2: genre_count 0.105903098 0.081240562 0.117334121
## 3: Main_Drama 0.041003417 0.032683993 0.031180730
## 4: between_90_to_135 0.037253539 0.031358938 0.045367223
## 5: PG.13 0.036774978 0.019829630 0.035022905
## 6: Spring 0.036173097 0.019596970 0.040195064
## 7: Fall 0.033508871 0.013954377 0.035318457
## 8: Summer 0.031427423 0.035562043 0.043298360
## 9: R 0.028930734 0.019777284 0.029555194
## 10: Main_Comedy 0.028573480 0.014610605 0.030441850
## 11: PG 0.026235070 0.027202056 0.025565243
## 12: Greater_than_135 0.018806232 0.022655282 0.015664253
## 13: Main_Action 0.015085692 0.017014680 0.016846461
## 14: Main_Thriller 0.013917641 0.008931984 0.011230974
## 15: Main_Horror 0.013452731 0.010433989 0.014482045
## 16: Main_Crime 0.011096380 0.008038030 0.010344318
## 17: Main_Animation 0.008857662 0.023862927 0.011969854
## 18: G 0.008538327 0.029323989 0.008718782
## 19: Main_Documentary 0.005841090 0.025664225 0.009309886
## 20: Main_Fantasy 0.005778447 0.022974971 0.008275454
## 21: Main_Family 0.005533681 0.022995320 0.007388799
## 22: Main_Adventure 0.004910549 0.014002714 0.008571006
## 23: Main_Science_Fiction 0.004666787 0.006675019 0.005172159
## 24: Main_Mystery 0.002403996 0.013936794 0.005172159
## 25: Main_Romance 0.002317772 0.004623207 0.003398847
## 26: Main_History 0.002116693 0.005473093 0.003398847
## Feature Gain Cover Frequency
xgb.plot.importance(importance)
Feature Importance:
The most important feature is Log_production_budget_adj, contributing significantly to the model with a high Gain and Cover. Other top features include genre_count, Main_Drama, and between_90_to_135, indicating their relevance in predicting critic scores.
Model Training Details:
The XGBoost model is trained with multi-class classification (multi:softprob), effectively handling three critic score categories. Parameters like max_depth = 6, eta = 0.1, and colsample_bytree = 0.8 optimize the balance between overfitting and learning efficiency.
Observations from Results:
The Gain column measures the average gain of splits using a feature. The Frequency column reveals how often a feature was used for splits; for instance, Log_production_budget_adj has the highest usage frequency, reinforcing its importance.
Evaluating the XGBoost
# Prepare test data
x_test <- model.matrix(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = test_data
)[, -1] # Remove intercept column
# Encode the true labels for comparison
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))
# Predict class probabilities on test data
predicted_probabilities <- predict(xgb_model, as.matrix(x_test))
# Reshape predicted probabilities into a matrix (rows = instances, columns = classes)
predicted_probabilities <- matrix(
predicted_probabilities,
nrow = nrow(x_test),
byrow = TRUE
)
# Assign column names for interpretability
colnames(predicted_probabilities) <- levels(train_data$Critic_score_category)
# Predict classes based on maximum probability
predicted_classes <- factor(
apply(predicted_probabilities, 1, function(row) colnames(predicted_probabilities)[which.max(row)]),
levels = levels(train_data$Critic_score_category)
)
library(caret)
# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 34 23 19
## Popular 44 72 41
## Unpopular 33 25 65
##
## Overall Statistics
##
## Accuracy : 0.4803
## 95% CI : (0.4274, 0.5336)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 3.692e-07
##
## Kappa : 0.2167
##
## Mcnemar's Test P-Value : 0.002608
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.30631 0.6000 0.5200
## Specificity 0.82857 0.6398 0.7489
## Pos Pred Value 0.44737 0.4586 0.5285
## Neg Pred Value 0.72500 0.7588 0.7425
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.09551 0.2022 0.1826
## Detection Prevalence 0.21348 0.4410 0.3455
## Balanced Accuracy 0.56744 0.6199 0.6345
# Extract metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]
# Print key metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4803371
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
## 0.3063063 0.6 0.52
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
## 0.8285714 0.6398305 0.7489177
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
# Create binary labels: 1 for the current class, 0 for all others
true_binary <- ifelse(true_classes == category, 1, 0)
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if binary labels have fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5807685
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Popular : 0.6732521
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Unpopular : 0.6639827
library(dplyr)
# Initialize a list to store Lift Tables for each class
lift_tables <- list()
# Loop through each class
for (category in levels(true_classes)) {
# Combine test data with predicted probabilities
test_data_category <- test_data %>%
mutate(predicted_prob = predicted_probabilities[, category])
# Sort by predicted probabilities for the current class
test_data_category <- test_data_category %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
# Calculate Lift Table
lift_table <- test_data_category %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Critic_score_category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
)
# Store the Lift Table
lift_tables[[category]] <- lift_table
# Plot Lift Chart for the current category
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}
Accuracy: 48.03%
Indicates that approximately 48% of the predictions made by the model are correct. While not particularly high, this accuracy reflects the challenges of the multi-class nature of the problem.
Sensitivity:
Moderate: 30.63% - The ability to correctly identify instances of the “Moderate” category is relatively low. Popular: 60.00% - The model is reasonably good at identifying “Popular” categories. Unpopular: 52.00% - Performs moderately well in detecting “Unpopular” films.
Specificity:
Moderate: 82.86% - High specificity indicates the model is good at recognizing when a film is not in the “Moderate” category. Popular: 63.98% - Slightly lower specificity for “Popular” categories. Unpopular: 74.89% - The model effectively identifies non-“Unpopular” films.
AUC (Area Under the ROC Curve):
Moderate: 0.5808 - Indicates poor to moderate performance for the “Moderate” category. Popular: 0.6733 - Fairly good discrimination for “Popular” films. Unpopular: 0.6640 - Moderate ability to distinguish “Unpopular” films.
Observations:
The model performs best for the “Popular” category, both in terms of sensitivity and specificity. “Moderate” films are the hardest to classify, as evidenced by the lower sensitivity and AUC. The overall performance is acceptable but leaves room for improvement, especially in accurately identifying the “Moderate” category.
polynonial
# Fit a polynomial logistic regression model
library(nnet)
polynomial_logistic_model <- multinom(
Critic_score_category ~ poly(Log_production_budget_adj, degree = 2) +
PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History +
Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights: 87 (56 variable)
## initial value 909.650975
## iter 10 value 848.123498
## iter 20 value 830.462966
## iter 30 value 825.737888
## iter 40 value 825.353131
## iter 50 value 825.220748
## iter 60 value 825.158466
## iter 70 value 825.150546
## final value 825.150110
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = Critic_score_category ~ poly(Log_production_budget_adj,
## degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_History + Main_History + Main_Romance + Main_Science_Fiction +
## Main_Thriller, data = train_data, maxit = 1000)
##
## Coefficients:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Popular 11.565474 -11.094519
## Unpopular -2.333291 3.613498
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R
## Popular 3.2002874 -12.634230 -12.483462
## Unpopular 0.7660846 4.843208 4.217229
## PG G between_90_to_135 Greater_than_135 Spring
## Popular -12.642653 -10.642985 0.1063207 0.4544483 -0.4819072
## Unpopular 4.833862 -7.730321 -0.8134503 -1.5856419 -0.3479529
## Summer Fall genre_count Main_Action Main_Adventure
## Popular -0.06116598 0.1720304 0.05304324 0.5042617 0.8005876
## Unpopular -0.73078344 -0.3815510 -0.10637888 -0.7136454 -1.2524360
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular 1.914293 1.050685 0.8261297 0.4381825 1.4331737
## Unpopular -1.846541 -0.791375 -0.7199814 -18.0163775 -0.7116571
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Popular 0.05035611 2.373883 0.4630853 0.7588602 1.055985
## Unpopular -2.79678478 1.171337 -0.4731329 0.1522330 -0.681507
## Main_Romance Main_Science_Fiction Main_Thriller
## Popular 1.0058449 1.3798434 0.7314751
## Unpopular -0.5297567 -0.1082246 -0.5628852
##
## Std. Errors:
## (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Popular 1.0923972 3.430354
## Unpopular 0.7623691 3.809983
## poly(Log_production_budget_adj, degree = 2)2 PG.13 R
## Popular 3.281846 0.3943373 0.4002812
## Unpopular 3.668616 0.2871574 0.2914631
## PG G between_90_to_135 Greater_than_135 Spring
## Popular 0.4390837 1.010275e+00 0.3483848 0.5165330 0.2815779
## Unpopular 0.3560708 1.083521e-05 0.3283432 0.5638362 0.2605881
## Summer Fall genre_count Main_Action Main_Adventure
## Popular 0.2709708 0.2625506 0.1010428 1.2580273 1.3143030
## Unpopular 0.2750184 0.2645398 0.1025857 0.9071704 0.9861707
## Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular 1.391344 1.2537677 1.2863022 1.525179e+00 1.250117
## Unpopular 1.190353 0.9123376 0.9665375 2.590817e-06 0.909805
## Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Popular 1.528569 1.675151 1.2944481 1.59332 1.554693
## Unpopular 1.448801 1.383650 0.9541438 1.23109 1.359981
## Main_Romance Main_Science_Fiction Main_Thriller
## Popular 1.357800 1.430722 1.2833793
## Unpopular 1.030348 1.141132 0.9467002
##
## Residual Deviance: 1650.3
## AIC: 1762.3
Model Convergence:
The model converged successfully after 70 iterations with a final value of the residual deviance of 1650.3, and the Akaike Information Criterion (AIC) value is 1762.3. Lower AIC suggests that the model balances goodness of fit and model complexity, but in comparison to simpler models, this is still relatively high.
Key Coefficients:
Features like poly(Log_production_budget_adj, degree = 2), PG.13, and G exhibit significant influence across the target categories (Popular, Unpopular, Moderate). Genre-related features (e.g., Main_Comedy, Main_Family, Main_Drama) also have notable coefficients, particularly for the “Popular” category, indicating their positive association with high critic scores.
Effect of Polynomial Transformation:
The inclusion of a second-degree polynomial transformation for Log_production_budget_adj allows the model to capture non-linear relationships between production budget and critic score categories. A large negative coefficient for the first term and a positive coefficient for the squared term suggest a U-shaped relationship.
Feature Contributions:
Main_Fantasy, Main_Drama, and genre_count show strong associations for the “Popular” category. Negative coefficients for many features under the “Unpopular” category suggest their strong divergence from films with high critic scores.
Residual Deviance:
The residual deviance (1650.3) provides a measure of how well the model fits the training data, though it is high, indicating scope for further optimization or using more flexible models.
Evaluating polynomial
# Predict class probabilities on the test data
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")
# Predict classes on the test data
predicted_classes <- predict(polynomial_logistic_model, newdata = test_data, type = "class")
# Ensure true labels are factors with the correct levels
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))
# Convert predicted classes to a factor with the same levels
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))
library(caret)
# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 19 12 15
## Popular 36 80 39
## Unpopular 56 28 71
##
## Overall Statistics
##
## Accuracy : 0.4775
## 95% CI : (0.4246, 0.5308)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 6.366e-07
##
## Kappa : 0.2085
##
## Mcnemar's Test P-Value : 3.638e-08
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.17117 0.6667 0.5680
## Specificity 0.88980 0.6822 0.6364
## Pos Pred Value 0.41304 0.5161 0.4581
## Neg Pred Value 0.70323 0.8010 0.7313
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.05337 0.2247 0.1994
## Detection Prevalence 0.12921 0.4354 0.4354
## Balanced Accuracy 0.53048 0.6744 0.6022
# Extract key metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]
# Print metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4775281
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
## 0.1711712 0.6666667 0.568
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
## 0.8897959 0.6822034 0.6363636
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
# Create binary labels: 1 for the current class, 0 for all others
true_binary <- ifelse(true_classes == category, 1, 0)
# Get predicted probabilities for the current class
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if binary labels have fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5812466
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Popular : 0.7346751
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Unpopular : 0.6522251
library(dplyr)
# Initialize a list to store Lift Tables for each class
lift_tables <- list()
# Loop through each class
for (category in levels(true_classes)) {
# Combine test data with predicted probabilities
test_data_category <- test_data %>%
mutate(predicted_prob = predicted_probabilities[, category])
# Sort by predicted probabilities for the current class
test_data_category <- test_data_category %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
# Calculate Lift Table
lift_table <- test_data_category %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Critic_score_category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
)
# Store the Lift Table
lift_tables[[category]] <- lift_table
# Plot Lift Chart for the current category
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}
Overall Accuracy:
The model achieved an accuracy of 47.75% on the test data, slightly higher than the no-information rate (35.11%), but it indicates there is still significant room for improvement.
Sensitivity (Recall):
Moderate: 17.12% Popular: 66.67% Unpopular: 56.80% The model performs well in identifying “Popular” movies but struggles with “Moderate” and “Unpopular” categories.
Specificity:
Moderate: 88.98% Popular: 68.22% Unpopular: 63.64% High specificity for “Moderate” implies the model rarely misclassifies other categories as “Moderate.”
AUC (Area Under ROC Curve):
Moderate: 0.5812 Popular: 0.7347 Unpopular: 0.6522 The “Popular” category demonstrates the highest AUC, indicating the model has good discriminatory power for this class.
Balanced Accuracy:
Combines sensitivity and specificity, reflecting how well the model distinguishes each class: Moderate: 53.05% Popular: 67.44% Unpopular: 60.22%
Decision tree
# Load required libraries
library(rpart)
library(rpart.plot)
# Train Decision Tree for Classification
dt_model_categorical <- rpart(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "class", # Classification tree
control = rpart.control(
cp = 0.005, # Smaller complexity parameter for more splits
maxdepth = 10, # Allow deeper trees
minsplit = 10 # Minimum observations required to split
)
)
# Plot the decision tree
rpart.plot(
dt_model_categorical,
type = 3, # Show splits and probabilities
extra = 101, # Display n, % observations, and class probabilities
under = TRUE, # Show text under the nodes
fallen.leaves = TRUE, # Spread the leaves horizontally
box.palette = "Blues" # Color scheme for the boxes
)
# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = Critic_score_category ~ Log_production_budget_adj +
## PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +
## Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +
## Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +
## Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +
## Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data,
## method = "class", control = rpart.control(cp = 0.005, maxdepth = 10,
## minsplit = 10))
## n= 828
##
## CP nsplit rel error xerror xstd
## 1 0.096339114 0 1.0000000 1.0000000 0.02681516
## 2 0.013487476 1 0.9036609 0.9229287 0.02737776
## 3 0.010597303 4 0.8593449 0.9479769 0.02722511
## 4 0.007707129 6 0.8381503 0.9479769 0.02722511
## 5 0.006743738 8 0.8227360 0.9383430 0.02728721
## 6 0.006262042 10 0.8092486 0.9364162 0.02729912
## 7 0.005780347 14 0.7842004 0.9287091 0.02734507
## 8 0.005000000 20 0.7495183 0.9229287 0.02737776
##
## Variable importance
## Log_production_budget_adj PG.13 R
## 32 14 12
## Main_Drama genre_count PG
## 8 7 6
## Fall Main_Comedy Spring
## 4 3 3
## G Summer Main_Horror
## 3 2 1
## Main_Documentary
## 1
##
## Node number 1: 828 observations, complexity param=0.09633911
## predicted class=Popular expected loss=0.6268116 P(node) =1
## class counts: 242 309 277
## probabilities: 0.292 0.373 0.335
## left son=2 (325 obs) right son=3 (503 obs)
## Primary splits:
## Log_production_budget_adj < 16.72627 to the left, improve=12.969260, (0 missing)
## Main_Drama < 0.5 to the right, improve= 9.259724, (0 missing)
## PG.13 < 0.5 to the left, improve= 8.957151, (0 missing)
## R < 0.5 to the right, improve= 8.756927, (0 missing)
## Main_Action < 0.5 to the right, improve= 5.618857, (0 missing)
## Surrogate splits:
## Main_Horror < 0.5 to the right, agree=0.636, adj=0.074, (0 split)
## genre_count < 1.5 to the left, agree=0.616, adj=0.022, (0 split)
## Main_Documentary < 0.5 to the right, agree=0.614, adj=0.015, (0 split)
## Main_Drama < 0.5 to the right, agree=0.609, adj=0.003, (0 split)
##
## Node number 2: 325 observations, complexity param=0.006743738
## predicted class=Popular expected loss=0.5046154 P(node) =0.3925121
## class counts: 85 161 79
## probabilities: 0.262 0.495 0.243
## left son=4 (199 obs) right son=5 (126 obs)
## Primary splits:
## R < 0.5 to the right, improve=4.805881, (0 missing)
## Log_production_budget_adj < 14.67788 to the right, improve=4.633759, (0 missing)
## Main_Drama < 0.5 to the right, improve=4.055589, (0 missing)
## PG < 0.5 to the right, improve=3.914004, (0 missing)
## PG.13 < 0.5 to the left, improve=2.785738, (0 missing)
## Surrogate splits:
## PG.13 < 0.5 to the left, agree=0.942, adj=0.849, (0 split)
## PG < 0.5 to the left, agree=0.662, adj=0.127, (0 split)
## Main_Documentary < 0.5 to the left, agree=0.631, adj=0.048, (0 split)
## Log_production_budget_adj < 16.64124 to the left, agree=0.625, adj=0.032, (0 split)
## G < 0.5 to the left, agree=0.618, adj=0.016, (0 split)
##
## Node number 3: 503 observations, complexity param=0.01348748
## predicted class=Unpopular expected loss=0.6063618 P(node) =0.6074879
## class counts: 157 148 198
## probabilities: 0.312 0.294 0.394
## left son=6 (285 obs) right son=7 (218 obs)
## Primary splits:
## PG.13 < 0.5 to the left, improve=4.575871, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=3.624457, (0 missing)
## Spring < 0.5 to the left, improve=3.454015, (0 missing)
## R < 0.5 to the right, improve=3.228630, (0 missing)
## Fall < 0.5 to the right, improve=3.111938, (0 missing)
## Surrogate splits:
## R < 0.5 to the right, agree=0.829, adj=0.606, (0 split)
## PG < 0.5 to the right, agree=0.596, adj=0.069, (0 split)
## Log_production_budget_adj < 18.53217 to the left, agree=0.586, adj=0.046, (0 split)
## Main_Action < 0.5 to the left, agree=0.579, adj=0.028, (0 split)
## Main_Romance < 0.5 to the left, agree=0.569, adj=0.005, (0 split)
##
## Node number 4: 199 observations
## predicted class=Popular expected loss=0.4271357 P(node) =0.2403382
## class counts: 47 114 38
## probabilities: 0.236 0.573 0.191
##
## Node number 5: 126 observations, complexity param=0.006743738
## predicted class=Popular expected loss=0.6269841 P(node) =0.1521739
## class counts: 38 47 41
## probabilities: 0.302 0.373 0.325
## left son=10 (16 obs) right son=11 (110 obs)
## Primary splits:
## PG < 0.5 to the right, improve=2.378030, (0 missing)
## Log_production_budget_adj < 14.55532 to the left, improve=1.814943, (0 missing)
## PG.13 < 0.5 to the left, improve=1.807345, (0 missing)
## Summer < 0.5 to the right, improve=1.668089, (0 missing)
## Main_Thriller < 0.5 to the left, improve=1.256322, (0 missing)
## Surrogate splits:
## PG.13 < 0.5 to the left, agree=0.976, adj=0.812, (0 split)
## Main_Family < 0.5 to the right, agree=0.889, adj=0.125, (0 split)
##
## Node number 6: 285 observations, complexity param=0.01348748
## predicted class=Moderate expected loss=0.6631579 P(node) =0.3442029
## class counts: 96 96 93
## probabilities: 0.337 0.337 0.326
## left son=12 (265 obs) right son=13 (20 obs)
## Primary splits:
## Log_production_budget_adj < 18.68043 to the left, improve=3.441212, (0 missing)
## genre_count < 1.5 to the right, improve=2.924045, (0 missing)
## G < 0.5 to the left, improve=2.676456, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=2.362950, (0 missing)
## Spring < 0.5 to the left, improve=1.496491, (0 missing)
##
## Node number 7: 218 observations, complexity param=0.007707129
## predicted class=Unpopular expected loss=0.5183486 P(node) =0.263285
## class counts: 61 52 105
## probabilities: 0.280 0.239 0.482
## left son=14 (51 obs) right son=15 (167 obs)
## Primary splits:
## Fall < 0.5 to the right, improve=3.037609, (0 missing)
## Log_production_budget_adj < 17.9594 to the right, improve=2.619436, (0 missing)
## Main_Drama < 0.5 to the right, improve=2.382238, (0 missing)
## genre_count < 3.5 to the left, improve=1.978286, (0 missing)
## Main_Crime < 0.5 to the right, improve=1.567336, (0 missing)
##
## Node number 10: 16 observations
## predicted class=Moderate expected loss=0.4375 P(node) =0.01932367
## class counts: 9 2 5
## probabilities: 0.562 0.125 0.312
##
## Node number 11: 110 observations, complexity param=0.005780347
## predicted class=Popular expected loss=0.5909091 P(node) =0.1328502
## class counts: 29 45 36
## probabilities: 0.264 0.409 0.327
## left son=22 (9 obs) right son=23 (101 obs)
## Primary splits:
## Log_production_budget_adj < 14.55532 to the left, improve=1.7477950, (0 missing)
## Summer < 0.5 to the right, improve=1.7295280, (0 missing)
## Main_Drama < 0.5 to the right, improve=1.6392590, (0 missing)
## Main_Thriller < 0.5 to the left, improve=1.3436360, (0 missing)
## Main_Documentary < 0.5 to the right, improve=0.7108062, (0 missing)
##
## Node number 12: 265 observations, complexity param=0.01348748
## predicted class=Moderate expected loss=0.645283 P(node) =0.3200483
## class counts: 94 83 88
## probabilities: 0.355 0.313 0.332
## left son=24 (236 obs) right son=25 (29 obs)
## Primary splits:
## genre_count < 1.5 to the right, improve=2.357373, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=2.303430, (0 missing)
## G < 0.5 to the left, improve=2.147660, (0 missing)
## Main_Drama < 0.5 to the left, improve=1.636870, (0 missing)
## Spring < 0.5 to the left, improve=1.509204, (0 missing)
##
## Node number 13: 20 observations
## predicted class=Popular expected loss=0.35 P(node) =0.02415459
## class counts: 2 13 5
## probabilities: 0.100 0.650 0.250
##
## Node number 14: 51 observations, complexity param=0.007707129
## predicted class=Popular expected loss=0.6078431 P(node) =0.0615942
## class counts: 14 20 17
## probabilities: 0.275 0.392 0.333
## left son=28 (21 obs) right son=29 (30 obs)
## Primary splits:
## Main_Drama < 0.5 to the right, improve=1.9232490, (0 missing)
## genre_count < 3.5 to the left, improve=1.8248370, (0 missing)
## Log_production_budget_adj < 18.06355 to the left, improve=1.2516040, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=0.6053922, (0 missing)
## Main_Action < 0.5 to the right, improve=0.3153515, (0 missing)
## Surrogate splits:
## genre_count < 2.5 to the left, agree=0.686, adj=0.238, (0 split)
## Log_production_budget_adj < 17.84238 to the left, agree=0.667, adj=0.190, (0 split)
## Greater_than_135 < 0.5 to the right, agree=0.608, adj=0.048, (0 split)
## Main_Action < 0.5 to the left, agree=0.608, adj=0.048, (0 split)
##
## Node number 15: 167 observations
## predicted class=Unpopular expected loss=0.4730539 P(node) =0.2016908
## class counts: 47 32 88
## probabilities: 0.281 0.192 0.527
##
## Node number 22: 9 observations
## predicted class=Popular expected loss=0.3333333 P(node) =0.01086957
## class counts: 3 6 0
## probabilities: 0.333 0.667 0.000
##
## Node number 23: 101 observations, complexity param=0.005780347
## predicted class=Popular expected loss=0.6138614 P(node) =0.1219807
## class counts: 26 39 36
## probabilities: 0.257 0.386 0.356
## left son=46 (20 obs) right son=47 (81 obs)
## Primary splits:
## Summer < 0.5 to the right, improve=1.2812740, (0 missing)
## Main_Drama < 0.5 to the right, improve=1.2598700, (0 missing)
## Log_production_budget_adj < 14.98777 to the right, improve=1.2034760, (0 missing)
## Main_Thriller < 0.5 to the left, improve=1.0795780, (0 missing)
## Main_Documentary < 0.5 to the right, improve=0.9192429, (0 missing)
## Surrogate splits:
## PG.13 < 0.5 to the left, agree=0.812, adj=0.05, (0 split)
##
## Node number 24: 236 observations, complexity param=0.0105973
## predicted class=Moderate expected loss=0.6313559 P(node) =0.2850242
## class counts: 87 77 72
## probabilities: 0.369 0.326 0.305
## left son=48 (233 obs) right son=49 (3 obs)
## Primary splits:
## G < 0.5 to the left, improve=2.075035, (0 missing)
## Main_Drama < 0.5 to the left, improve=1.654108, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.330858, (0 missing)
## Log_production_budget_adj < 18.58293 to the left, improve=1.295880, (0 missing)
## genre_count < 2.5 to the left, improve=1.167829, (0 missing)
##
## Node number 25: 29 observations
## predicted class=Unpopular expected loss=0.4482759 P(node) =0.03502415
## class counts: 7 6 16
## probabilities: 0.241 0.207 0.552
##
## Node number 28: 21 observations
## predicted class=Popular expected loss=0.4285714 P(node) =0.02536232
## class counts: 5 12 4
## probabilities: 0.238 0.571 0.190
##
## Node number 29: 30 observations
## predicted class=Unpopular expected loss=0.5666667 P(node) =0.03623188
## class counts: 9 8 13
## probabilities: 0.300 0.267 0.433
##
## Node number 46: 20 observations
## predicted class=Popular expected loss=0.45 P(node) =0.02415459
## class counts: 5 11 4
## probabilities: 0.250 0.550 0.200
##
## Node number 47: 81 observations, complexity param=0.005780347
## predicted class=Unpopular expected loss=0.6049383 P(node) =0.09782609
## class counts: 21 28 32
## probabilities: 0.259 0.346 0.395
## left son=94 (57 obs) right son=95 (24 obs)
## Primary splits:
## Main_Drama < 0.5 to the left, improve=2.0547430, (0 missing)
## Log_production_budget_adj < 15.64706 to the right, improve=1.3793050, (0 missing)
## Main_Documentary < 0.5 to the right, improve=1.0037990, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=0.7613285, (0 missing)
## Main_Thriller < 0.5 to the left, improve=0.7412346, (0 missing)
##
## Node number 48: 233 observations, complexity param=0.0105973
## predicted class=Moderate expected loss=0.6266094 P(node) =0.281401
## class counts: 87 74 72
## probabilities: 0.373 0.318 0.309
## left son=96 (188 obs) right son=97 (45 obs)
## Primary splits:
## Main_Drama < 0.5 to the left, improve=1.870095, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.409792, (0 missing)
## Log_production_budget_adj < 18.00082 to the left, improve=1.339468, (0 missing)
## Spring < 0.5 to the left, improve=1.245670, (0 missing)
## genre_count < 2.5 to the left, improve=1.102906, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 16.77728 to the right, agree=0.815, adj=0.044, (0 split)
##
## Node number 49: 3 observations
## predicted class=Popular expected loss=0 P(node) =0.003623188
## class counts: 0 3 0
## probabilities: 0.000 1.000 0.000
##
## Node number 94: 57 observations
## predicted class=Unpopular expected loss=0.5789474 P(node) =0.06884058
## class counts: 18 15 24
## probabilities: 0.316 0.263 0.421
##
## Node number 95: 24 observations, complexity param=0.005780347
## predicted class=Popular expected loss=0.4583333 P(node) =0.02898551
## class counts: 3 13 8
## probabilities: 0.125 0.542 0.333
## left son=190 (14 obs) right son=191 (10 obs)
## Primary splits:
## Log_production_budget_adj < 16.11692 to the right, improve=2.2309520, (0 missing)
## genre_count < 2.5 to the left, improve=0.6745614, (0 missing)
## Spring < 0.5 to the right, improve=0.4055556, (0 missing)
## Fall < 0.5 to the left, improve=0.4023810, (0 missing)
## Surrogate splits:
## Spring < 0.5 to the left, agree=0.625, adj=0.1, (0 split)
## genre_count < 2.5 to the left, agree=0.625, adj=0.1, (0 split)
##
## Node number 96: 188 observations, complexity param=0.006262042
## predicted class=Moderate expected loss=0.606383 P(node) =0.2270531
## class counts: 74 53 61
## probabilities: 0.394 0.282 0.324
## left son=192 (48 obs) right son=193 (140 obs)
## Primary splits:
## genre_count < 2.5 to the left, improve=1.999012, (0 missing)
## Log_production_budget_adj < 17.21913 to the left, improve=1.761271, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.123146, (0 missing)
## R < 0.5 to the right, improve=0.974878, (0 missing)
## PG < 0.5 to the left, improve=0.974878, (0 missing)
## Surrogate splits:
## Log_production_budget_adj < 16.82575 to the left, agree=0.755, adj=0.042, (0 split)
## Main_Family < 0.5 to the right, agree=0.750, adj=0.021, (0 split)
## Main_Horror < 0.5 to the right, agree=0.750, adj=0.021, (0 split)
## Main_Mystery < 0.5 to the right, agree=0.750, adj=0.021, (0 split)
##
## Node number 97: 45 observations
## predicted class=Popular expected loss=0.5333333 P(node) =0.05434783
## class counts: 13 21 11
## probabilities: 0.289 0.467 0.244
##
## Node number 190: 14 observations
## predicted class=Popular expected loss=0.2857143 P(node) =0.01690821
## class counts: 2 10 2
## probabilities: 0.143 0.714 0.143
##
## Node number 191: 10 observations
## predicted class=Unpopular expected loss=0.4 P(node) =0.01207729
## class counts: 1 3 6
## probabilities: 0.100 0.300 0.600
##
## Node number 192: 48 observations
## predicted class=Moderate expected loss=0.5 P(node) =0.05797101
## class counts: 24 15 9
## probabilities: 0.500 0.312 0.187
##
## Node number 193: 140 observations, complexity param=0.006262042
## predicted class=Unpopular expected loss=0.6285714 P(node) =0.1690821
## class counts: 50 38 52
## probabilities: 0.357 0.271 0.371
## left son=386 (27 obs) right son=387 (113 obs)
## Primary splits:
## Log_production_budget_adj < 17.2662 to the left, improve=1.0255930, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=1.0130130, (0 missing)
## Spring < 0.5 to the left, improve=0.9731251, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=0.9407563, (0 missing)
## genre_count < 5.5 to the left, improve=0.6700035, (0 missing)
##
## Node number 386: 27 observations, complexity param=0.005780347
## predicted class=Moderate expected loss=0.5185185 P(node) =0.0326087
## class counts: 13 4 10
## probabilities: 0.481 0.148 0.370
## left son=772 (24 obs) right son=773 (3 obs)
## Primary splits:
## Spring < 0.5 to the left, improve=2.1944440, (0 missing)
## Log_production_budget_adj < 16.87818 to the left, improve=1.4444440, (0 missing)
## between_90_to_135 < 0.5 to the left, improve=0.4444444, (0 missing)
## Main_Thriller < 0.5 to the left, improve=0.3015873, (0 missing)
## R < 0.5 to the right, improve=0.1835749, (0 missing)
##
## Node number 387: 113 observations, complexity param=0.006262042
## predicted class=Unpopular expected loss=0.6283186 P(node) =0.1364734
## class counts: 37 34 42
## probabilities: 0.327 0.301 0.372
## left son=774 (24 obs) right son=775 (89 obs)
## Primary splits:
## Log_production_budget_adj < 18.32308 to the right, improve=1.5011770, (0 missing)
## Main_Fantasy < 0.5 to the left, improve=0.9937891, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=0.8847240, (0 missing)
## Main_Adventure < 0.5 to the right, improve=0.8754166, (0 missing)
## Spring < 0.5 to the right, improve=0.6983541, (0 missing)
##
## Node number 772: 24 observations
## predicted class=Moderate expected loss=0.4583333 P(node) =0.02898551
## class counts: 13 4 7
## probabilities: 0.542 0.167 0.292
##
## Node number 773: 3 observations
## predicted class=Unpopular expected loss=0 P(node) =0.003623188
## class counts: 0 0 3
## probabilities: 0.000 0.000 1.000
##
## Node number 774: 24 observations, complexity param=0.005780347
## predicted class=Moderate expected loss=0.5 P(node) =0.02898551
## class counts: 12 4 8
## probabilities: 0.500 0.167 0.333
## left son=1548 (21 obs) right son=1549 (3 obs)
## Primary splits:
## Main_Comedy < 0.5 to the left, improve=2.4761900, (0 missing)
## between_90_to_135 < 0.5 to the right, improve=1.3333330, (0 missing)
## Log_production_budget_adj < 18.58293 to the left, improve=1.2666670, (0 missing)
## Greater_than_135 < 0.5 to the left, improve=0.8666667, (0 missing)
## Main_Action < 0.5 to the right, improve=0.8561404, (0 missing)
##
## Node number 775: 89 observations, complexity param=0.006262042
## predicted class=Unpopular expected loss=0.6179775 P(node) =0.1074879
## class counts: 25 30 34
## probabilities: 0.281 0.337 0.382
## left son=1550 (60 obs) right son=1551 (29 obs)
## Primary splits:
## R < 0.5 to the right, improve=1.4787030, (0 missing)
## PG < 0.5 to the left, improve=1.4787030, (0 missing)
## Log_production_budget_adj < 18.0047 to the left, improve=1.3404570, (0 missing)
## Greater_than_135 < 0.5 to the right, improve=0.9234633, (0 missing)
## Main_Adventure < 0.5 to the right, improve=0.8918366, (0 missing)
## Surrogate splits:
## PG < 0.5 to the left, agree=1.000, adj=1.000, (0 split)
## Main_Animation < 0.5 to the left, agree=0.753, adj=0.241, (0 split)
## Main_Adventure < 0.5 to the left, agree=0.742, adj=0.207, (0 split)
## Log_production_budget_adj < 17.86695 to the left, agree=0.708, adj=0.103, (0 split)
## between_90_to_135 < 0.5 to the right, agree=0.708, adj=0.103, (0 split)
##
## Node number 1548: 21 observations
## predicted class=Moderate expected loss=0.4285714 P(node) =0.02536232
## class counts: 12 4 5
## probabilities: 0.571 0.190 0.238
##
## Node number 1549: 3 observations
## predicted class=Unpopular expected loss=0 P(node) =0.003623188
## class counts: 0 0 3
## probabilities: 0.000 0.000 1.000
##
## Node number 1550: 60 observations
## predicted class=Unpopular expected loss=0.6 P(node) =0.07246377
## class counts: 20 16 24
## probabilities: 0.333 0.267 0.400
##
## Node number 1551: 29 observations
## predicted class=Popular expected loss=0.5172414 P(node) =0.03502415
## class counts: 5 14 10
## probabilities: 0.172 0.483 0.345
Root Node: The tree starts with a split on Log_production_budget_adj. This means the production budget is the most influential feature in determining the critic score categories.
Splits: The tree branches out based on feature thresholds, such as PG.13, genre_count, Summer, and other attributes. These splits aim to separate the data into more homogeneous groups concerning the target categories (e.g., “Moderate,” “Popular,” “Unpopular”).
Leaf Nodes: At the ends of the branches, leaf nodes show the predicted category and the proportion of data points in each category (e.g., 50% Popular, 30% Unpopular). These nodes represent the final decision for a given set of feature values.
Feature Importance: Features such as Log_production_budget_adj, PG.13, and genre_count appear frequently near the top, indicating their importance in predicting critic scores.
General Structure: The tree uses thresholds to segment the data into subgroups, attempting to classify observations as accurately as possible while maintaining interpretability.
Evaluation of decision tree
# Predict class probabilities on test data
predicted_probabilities <- predict(dt_model_categorical, newdata = test_data, type = "prob")
# Predict classes on test data
predicted_classes <- predict(dt_model_categorical, newdata = test_data, type = "class")
# Ensure true labels are factors
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))
# Convert predicted classes to factors
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))
library(caret)
# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)
# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 19 17 17
## Popular 40 66 30
## Unpopular 52 37 78
##
## Overall Statistics
##
## Accuracy : 0.4579
## 95% CI : (0.4052, 0.5112)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 2.130e-05
##
## Kappa : 0.1787
##
## Mcnemar's Test P-Value : 4.068e-06
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.17117 0.5500 0.6240
## Specificity 0.86122 0.7034 0.6147
## Pos Pred Value 0.35849 0.4853 0.4671
## Neg Pred Value 0.69637 0.7545 0.7513
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.05337 0.1854 0.2191
## Detection Prevalence 0.14888 0.3820 0.4691
## Balanced Accuracy 0.51620 0.6267 0.6194
library(pROC)
# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()
# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
# Create binary labels: 1 for the current class, 0 for all others
true_binary <- ifelse(true_classes == category, 1, 0)
# Get predicted probabilities for the current class
predicted_probs_binary <- predicted_probabilities[, category]
# Skip if true_binary has fewer than two levels
if (length(unique(true_binary)) < 2) {
cat("Skipping ROC for", category, "due to insufficient data.\n")
next
}
# Compute ROC
roc_obj <- roc(true_binary, predicted_probs_binary)
roc_list[[category]] <- roc_obj
auc_list[[category]] <- auc(roc_obj)
# Plot ROC curve
plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.50467
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Popular : 0.6490643
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Unpopular : 0.6117056
library(dplyr)
# Initialize a list to store Lift Tables for each class
lift_tables <- list()
# Loop through each class
for (category in levels(true_classes)) {
# Combine test data with predicted probabilities
test_data_category <- test_data %>%
mutate(predicted_prob = predicted_probabilities[, category])
# Sort by predicted probabilities for the current class
test_data_category <- test_data_category %>%
arrange(desc(predicted_prob))
# Add deciles
test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
# Calculate Lift Table
lift_table <- test_data_category %>%
group_by(decile) %>%
summarize(
total = n(),
events = sum(Critic_score_category == category),
cumulative_events = cumsum(events),
cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
)
# Store the Lift Table
lift_tables[[category]] <- lift_table
# Plot Lift Chart for the current category
plot(
lift_table$decile, lift_table$cumulative_percentage,
type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}
The evaluation metrics for the Decision Tree model reveal its performance for classifying the “Critic_Score_Category” target variable. Below is the summary:
Overall Accuracy:
The accuracy of the model is 45.79%, which indicates that approximately 46% of predictions match the actual classes.
Class-wise Sensitivity:
Moderate: Sensitivity is 17.12%, indicating a low ability to identify this class. Popular: Sensitivity is 55.00%, showing moderate success in identifying this class. Unpopular: Sensitivity is 62.40%, performing relatively well for this class.
Class-wise Specificity:
Moderate: Specificity is 86.12%, meaning the model is good at excluding instances that are not “Moderate.” Popular: Specificity is 70.34%, showing it is decent at excluding non-“Popular” cases. Unpopular: Specificity is 61.47%, performing adequately for this class.
AUC (Area Under the ROC Curve):
Moderate: AUC is 0.5047, indicating random performance for this class. Popular: AUC is 0.6491, indicating moderate discrimination ability. Unpopular: AUC is 0.6117, suggesting fair discrimination for this class.
For optimizing film investments based on Critic_Score_Category, the best model is XGBoost.
Why? High Accuracy: XGBoost offers the most robust predictive performance compared to other models, ensuring better categorization of films into Popular, Moderate, or Unpopular categories. Class Handling: It excels in managing imbalanced datasets, critical for avoiding missed predictions in key categories like Popular (which are vital for investment decisions). Feature Interactions: XGBoost’s ability to capture complex interactions between features like budget, genre, and seasonal release ensures nuanced predictions that align with film investment goals. Scalability: Its efficient computation allows handling large datasets, making it scalable for future predictions as more data becomes available. XGBoost is the most suitable model for your goal of maximizing returns on film investments.
Verified Significant Variables:
From the Random Forest model evaluation:
Log_production_budget_adj: A dominant predictor; indicates that budget heavily influences the IMDB category. PG.13 and R: Age ratings significantly impact categorization, reflecting audience segmentation. Genre_count: Diversity in genres is an essential factor. Main_Drama: Drama as the primary genre has a consistent impact. Between_90_to_135: Movie duration in this range strongly affects categorization. Seasonal Variables (Spring, Summer, Fall): Timing of releases is important for success. These variables align with the highest MeanDecreaseAccuracy and MeanDecreaseGini metrics observed in the Random Forest model for IMDB_Category.
Verified Significant Variables:
From the XGBoost model evaluation:
Log_production_budget_adj: The most crucial variable across all evaluation metrics, showing its significant effect on critic scores. Genre_count: Indicates how the diversity of genres impacts critics’ evaluations. Main_Drama: The drama genre is consistently favored by critics. Between_90_to_135: Movies in this duration range are favored. PG.13, R, and G: Age ratings are significant predictors, reflecting audience targeting and content alignment with critics’ preferences. Seasonal Variables (Spring, Fall, Summer): Timing strongly influences critic perceptions. These variables are supported by their high importance scores (gain, cover, and frequency) in the XGBoost model.
Verified Significant Variables:
From the XGBoost model evaluation:
Log_production_budget_adj: Consistently the most important variable, as it directly affects revenue. Main_Action, Main_Adventure, Main_Comedy: These genres dominate revenue performance. Genre_count: A diverse set of genres correlates with higher revenue. Between_90_to_135 and Greater_than_135: Longer movie durations are critical for maximizing revenue. Seasonal Variables (Spring, Summer): Timing affects box office success. PG.13 and R: Age ratings are key to targeting the right audience for maximum revenue. The feature importance scores in XGBoost strongly support these variables.
We are taking Worldwide gross = XGBoost IMDbrating = Random forest Critic score = XGBoost
library(caret)
# Define a 10-fold cross-validation
train_control <- trainControl(method = "cv", number = 10)
# XGBoost Model for Critic score
xgb_model_Critic_score_cv <- train(
Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "xgbTree", # XGBoost model
trControl = train_control
)
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
# Random Forest Model for IMDb Rating
rf_model_IMDB_Category_cv <- train(
IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "rf", # Random Forest method
trControl = train_control,
tuneGrid = expand.grid(
mtry = c(2, 4, 6, 8) # Number of features randomly selected at each split
)
)
# XGBoost Model for Worldwide Gross Adj Category
xgb_model_Gross_Category_cv <- train(
Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G +
between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count +
Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime +
Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror +
Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
data = train_data,
method = "xgbTree", # XGBoost model
trControl = train_control
)
## [19:29:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
# Check the results of cross-validation
print(xgb_model_Critic_score_cv)
## eXtreme Gradient Boosting
##
## 828 samples
## 26 predictor
## 3 classes: 'Moderate', 'Popular', 'Unpopular'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 745, 745, 746, 744, 744, 745, ...
## Resampling results across tuning parameters:
##
## eta max_depth colsample_bytree subsample nrounds Accuracy Kappa
## 0.3 1 0.6 0.50 50 0.4382869 0.1400253
## 0.3 1 0.6 0.50 100 0.4275013 0.1280986
## 0.3 1 0.6 0.50 150 0.4178473 0.1131868
## 0.3 1 0.6 0.75 50 0.4335978 0.1335159
## 0.3 1 0.6 0.75 100 0.4407984 0.1453639
## 0.3 1 0.6 0.75 150 0.4287215 0.1287708
## 0.3 1 0.6 1.00 50 0.4347890 0.1346220
## 0.3 1 0.6 1.00 100 0.4299697 0.1289202
## 0.3 1 0.6 1.00 150 0.4324091 0.1334236
## 0.3 1 0.8 0.50 50 0.4372280 0.1389427
## 0.3 1 0.8 0.50 100 0.4346718 0.1376657
## 0.3 1 0.8 0.50 150 0.4312186 0.1329044
## 0.3 1 0.8 0.75 50 0.4431790 0.1484338
## 0.3 1 0.8 0.75 100 0.4348621 0.1380564
## 0.3 1 0.8 0.75 150 0.4408285 0.1476388
## 0.3 1 0.8 1.00 50 0.4360085 0.1355260
## 0.3 1 0.8 1.00 100 0.4275604 0.1247945
## 0.3 1 0.8 1.00 150 0.4384919 0.1423959
## 0.3 2 0.6 0.50 50 0.4454882 0.1553027
## 0.3 2 0.6 0.50 100 0.4298403 0.1351383
## 0.3 2 0.6 0.50 150 0.4407861 0.1518995
## 0.3 2 0.6 0.75 50 0.4334106 0.1364740
## 0.3 2 0.6 0.75 100 0.4334537 0.1381613
## 0.3 2 0.6 0.75 150 0.4335117 0.1400984
## 0.3 2 0.6 1.00 50 0.4371993 0.1420571
## 0.3 2 0.6 1.00 100 0.4335261 0.1403270
## 0.3 2 0.6 1.00 150 0.4443716 0.1579033
## 0.3 2 0.8 0.50 50 0.4455033 0.1547767
## 0.3 2 0.8 0.50 100 0.4588021 0.1766237
## 0.3 2 0.8 0.50 150 0.4322646 0.1368073
## 0.3 2 0.8 0.75 50 0.4335124 0.1376986
## 0.3 2 0.8 0.75 100 0.4238725 0.1248068
## 0.3 2 0.8 0.75 150 0.4408439 0.1520686
## 0.3 2 0.8 1.00 50 0.4479997 0.1578043
## 0.3 2 0.8 1.00 100 0.4346725 0.1414842
## 0.3 2 0.8 1.00 150 0.4504674 0.1658787
## 0.3 3 0.6 0.50 50 0.4433123 0.1541815
## 0.3 3 0.6 0.50 100 0.4444590 0.1568026
## 0.3 3 0.6 0.50 150 0.4359662 0.1461649
## 0.3 3 0.6 0.75 50 0.4588591 0.1768158
## 0.3 3 0.6 0.75 100 0.4456198 0.1600976
## 0.3 3 0.6 0.75 150 0.4396111 0.1511199
## 0.3 3 0.6 1.00 50 0.4359508 0.1427112
## 0.3 3 0.6 1.00 100 0.4385059 0.1480578
## 0.3 3 0.6 1.00 150 0.4372284 0.1477963
## 0.3 3 0.8 0.50 50 0.4201709 0.1209373
## 0.3 3 0.8 0.50 100 0.4311332 0.1382388
## 0.3 3 0.8 0.50 150 0.4360246 0.1468356
## 0.3 3 0.8 0.75 50 0.4383618 0.1465379
## 0.3 3 0.8 0.75 100 0.4421082 0.1543454
## 0.3 3 0.8 0.75 150 0.4457667 0.1613499
## 0.3 3 0.8 1.00 50 0.4468239 0.1586677
## 0.3 3 0.8 1.00 100 0.4456044 0.1597067
## 0.3 3 0.8 1.00 150 0.4384181 0.1505586
## 0.4 1 0.6 0.50 50 0.4383755 0.1408434
## 0.4 1 0.6 0.50 100 0.4360963 0.1408669
## 0.4 1 0.6 0.50 150 0.4311161 0.1342784
## 0.4 1 0.6 0.75 50 0.4468529 0.1557941
## 0.4 1 0.6 0.75 100 0.4334239 0.1354584
## 0.4 1 0.6 0.75 150 0.4286914 0.1292268
## 0.4 1 0.6 1.00 50 0.4323650 0.1316415
## 0.4 1 0.6 1.00 100 0.4372287 0.1408279
## 0.4 1 0.6 1.00 150 0.4348334 0.1382558
## 0.4 1 0.8 0.50 50 0.4648398 0.1825249
## 0.4 1 0.8 0.50 100 0.4468680 0.1553975
## 0.4 1 0.8 0.50 150 0.4442838 0.1538679
## 0.4 1 0.8 0.75 50 0.4396376 0.1434181
## 0.4 1 0.8 0.75 100 0.4250483 0.1243819
## 0.4 1 0.8 0.75 150 0.4298969 0.1304517
## 0.4 1 0.8 1.00 50 0.4384041 0.1404161
## 0.4 1 0.8 1.00 100 0.4409310 0.1462060
## 0.4 1 0.8 1.00 150 0.4312476 0.1325871
## 0.4 2 0.6 0.50 50 0.4165859 0.1130498
## 0.4 2 0.6 0.50 100 0.4321922 0.1399234
## 0.4 2 0.6 0.50 150 0.4359368 0.1449768
## 0.4 2 0.6 0.75 50 0.4430929 0.1529243
## 0.4 2 0.6 0.75 100 0.4382880 0.1485575
## 0.4 2 0.6 0.75 150 0.4358486 0.1448665
## 0.4 2 0.6 1.00 50 0.4514840 0.1647786
## 0.4 2 0.6 1.00 100 0.4382152 0.1469995
## 0.4 2 0.6 1.00 150 0.4394788 0.1502305
## 0.4 2 0.8 0.50 50 0.4432108 0.1544161
## 0.4 2 0.8 0.50 100 0.4322786 0.1402814
## 0.4 2 0.8 0.50 150 0.4444447 0.1590664
## 0.4 2 0.8 0.75 50 0.4358056 0.1413575
## 0.4 2 0.8 0.75 100 0.4478842 0.1621025
## 0.4 2 0.8 0.75 150 0.4479573 0.1637495
## 0.4 2 0.8 1.00 50 0.4382583 0.1456164
## 0.4 2 0.8 1.00 100 0.4528053 0.1706389
## 0.4 2 0.8 1.00 150 0.4516439 0.1684467
## 0.4 3 0.6 0.50 50 0.4285770 0.1335066
## 0.4 3 0.6 0.50 100 0.4467816 0.1639615
## 0.4 3 0.6 0.50 150 0.4372291 0.1497626
## 0.4 3 0.6 0.75 50 0.4299715 0.1353763
## 0.4 3 0.6 0.75 100 0.4274884 0.1320303
## 0.4 3 0.6 0.75 150 0.4202584 0.1240469
## 0.4 3 0.6 1.00 50 0.4395232 0.1485848
## 0.4 3 0.6 1.00 100 0.4444587 0.1577159
## 0.4 3 0.6 1.00 150 0.4373172 0.1483019
## 0.4 3 0.8 0.50 50 0.4420921 0.1536377
## 0.4 3 0.8 0.50 100 0.4409460 0.1550329
## 0.4 3 0.8 0.50 150 0.4336580 0.1445654
## 0.4 3 0.8 0.75 50 0.4348331 0.1439464
## 0.4 3 0.8 0.75 100 0.4482764 0.1651645
## 0.4 3 0.8 0.75 150 0.4312190 0.1407432
## 0.4 3 0.8 1.00 50 0.4299847 0.1347275
## 0.4 3 0.8 1.00 100 0.4576694 0.1788384
## 0.4 3 0.8 1.00 150 0.4468683 0.1632630
##
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning
## parameter 'min_child_weight' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were nrounds = 50, max_depth = 1, eta
## = 0.4, gamma = 0, colsample_bytree = 0.8, min_child_weight = 1 and subsample
## = 0.5.
print(xgb_model_Gross_Category_cv)
## eXtreme Gradient Boosting
##
## 828 samples
## 26 predictor
## 3 classes: 'High's', 'Low's', 'Medium'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 746, 745, 744, 745, 745, 746, ...
## Resampling results across tuning parameters:
##
## eta max_depth colsample_bytree subsample nrounds Accuracy Kappa
## 0.3 1 0.6 0.50 50 0.6232459 0.4345190
## 0.3 1 0.6 0.50 100 0.6171480 0.4253202
## 0.3 1 0.6 0.50 150 0.6219824 0.4326543
## 0.3 1 0.6 0.75 50 0.6316800 0.4471956
## 0.3 1 0.6 0.75 100 0.6207485 0.4307392
## 0.3 1 0.6 0.75 150 0.6195297 0.4290073
## 0.3 1 0.6 1.00 50 0.6280362 0.4416780
## 0.3 1 0.6 1.00 100 0.6184554 0.4273351
## 0.3 1 0.6 1.00 150 0.6196315 0.4291602
## 0.3 1 0.8 0.50 50 0.6267733 0.4398282
## 0.3 1 0.8 0.50 100 0.6255828 0.4380131
## 0.3 1 0.8 0.50 150 0.6194562 0.4288990
## 0.3 1 0.8 0.75 50 0.6327834 0.4488104
## 0.3 1 0.8 0.75 100 0.6182664 0.4270410
## 0.3 1 0.8 0.75 150 0.6170753 0.4252832
## 0.3 1 0.8 1.00 50 0.6316510 0.4471600
## 0.3 1 0.8 1.00 100 0.6196458 0.4291615
## 0.3 1 0.8 1.00 150 0.6196168 0.4290899
## 0.3 2 0.6 0.50 50 0.6099342 0.4145862
## 0.3 2 0.6 0.50 100 0.5929488 0.3891966
## 0.3 2 0.6 0.50 150 0.5689245 0.3531501
## 0.3 2 0.6 0.75 50 0.6292417 0.4434909
## 0.3 2 0.6 0.75 100 0.5906696 0.3857802
## 0.3 2 0.6 0.75 150 0.5870415 0.3803804
## 0.3 2 0.6 1.00 50 0.6039678 0.4057811
## 0.3 2 0.6 1.00 100 0.5906553 0.3857620
## 0.3 2 0.6 1.00 150 0.5942267 0.3910807
## 0.3 2 0.8 0.50 50 0.6039244 0.4057016
## 0.3 2 0.8 0.50 100 0.5761530 0.3639944
## 0.3 2 0.8 0.50 150 0.5724665 0.3585985
## 0.3 2 0.8 0.75 50 0.6039531 0.4055833
## 0.3 2 0.8 0.75 100 0.6002372 0.4000583
## 0.3 2 0.8 0.75 150 0.5846749 0.3767240
## 0.3 2 0.8 1.00 50 0.6136357 0.4202235
## 0.3 2 0.8 1.00 100 0.5991044 0.3983476
## 0.3 2 0.8 1.00 150 0.5846602 0.3767562
## 0.3 3 0.6 0.50 50 0.5942855 0.3913507
## 0.3 3 0.6 0.50 100 0.5712911 0.3567411
## 0.3 3 0.6 0.50 150 0.5737742 0.3604833
## 0.3 3 0.6 0.75 50 0.5954746 0.3930137
## 0.3 3 0.6 0.75 100 0.5617246 0.3423867
## 0.3 3 0.6 0.75 150 0.5628427 0.3440744
## 0.3 3 0.6 1.00 50 0.6087731 0.4128443
## 0.3 3 0.6 1.00 100 0.5894512 0.3838086
## 0.3 3 0.6 1.00 150 0.5737591 0.3603860
## 0.3 3 0.8 0.50 50 0.5893928 0.3838024
## 0.3 3 0.8 0.50 100 0.5616805 0.3422001
## 0.3 3 0.8 0.50 150 0.5761684 0.3640110
## 0.3 3 0.8 0.75 50 0.5929352 0.3891028
## 0.3 3 0.8 0.75 100 0.5737004 0.3603453
## 0.3 3 0.8 0.75 150 0.5520273 0.3278311
## 0.3 3 0.8 1.00 50 0.6063487 0.4092710
## 0.3 3 0.8 1.00 100 0.5942991 0.3912471
## 0.3 3 0.8 1.00 150 0.5725683 0.3585048
## 0.4 1 0.6 0.50 50 0.6171631 0.4252934
## 0.4 1 0.6 0.50 100 0.6123725 0.4182555
## 0.4 1 0.6 0.50 150 0.6219827 0.4327167
## 0.4 1 0.6 0.75 50 0.6292267 0.4434681
## 0.4 1 0.6 0.75 100 0.6135185 0.4199619
## 0.4 1 0.6 0.75 150 0.6135045 0.4199378
## 0.4 1 0.6 1.00 50 0.6231879 0.4344420
## 0.4 1 0.6 1.00 100 0.6183973 0.4272799
## 0.4 1 0.6 1.00 150 0.6171925 0.4254782
## 0.4 1 0.8 0.50 50 0.6365140 0.4543527
## 0.4 1 0.8 0.50 100 0.6219383 0.4326291
## 0.4 1 0.8 0.50 150 0.6135192 0.4199813
## 0.4 1 0.8 0.75 50 0.6268891 0.4399829
## 0.4 1 0.8 0.75 100 0.6196308 0.4291595
## 0.4 1 0.8 0.75 150 0.6208062 0.4309143
## 0.4 1 0.8 1.00 50 0.6219974 0.4326345
## 0.4 1 0.8 1.00 100 0.6195874 0.4290171
## 0.4 1 0.8 1.00 150 0.6159876 0.4236622
## 0.4 2 0.6 0.50 50 0.6039244 0.4055916
## 0.4 2 0.6 0.50 100 0.5772722 0.3657056
## 0.4 2 0.6 0.50 150 0.5785641 0.3676178
## 0.4 2 0.6 0.75 50 0.6002221 0.4001511
## 0.4 2 0.6 0.75 100 0.5893777 0.3839679
## 0.4 2 0.6 0.75 150 0.5676756 0.3514776
## 0.4 2 0.6 1.00 50 0.6063487 0.4093232
## 0.4 2 0.6 1.00 100 0.5809149 0.3711871
## 0.4 2 0.6 1.00 150 0.5773145 0.3657784
## 0.4 2 0.8 0.50 50 0.5978118 0.3965031
## 0.4 2 0.8 0.50 100 0.5627395 0.3438580
## 0.4 2 0.8 0.50 150 0.5701436 0.3549773
## 0.4 2 0.8 0.75 50 0.6063033 0.4092393
## 0.4 2 0.8 0.75 100 0.5725669 0.3586612
## 0.4 2 0.8 0.75 150 0.5749493 0.3622271
## 0.4 2 0.8 1.00 50 0.6111971 0.4165542
## 0.4 2 0.8 1.00 100 0.5918167 0.3873950
## 0.4 2 0.8 1.00 150 0.5725687 0.3585066
## 0.4 3 0.6 0.50 50 0.5726114 0.3586418
## 0.4 3 0.6 0.50 100 0.5738032 0.3605582
## 0.4 3 0.6 0.50 150 0.5508959 0.3261357
## 0.4 3 0.6 0.75 50 0.5737301 0.3603060
## 0.4 3 0.6 0.75 100 0.5640625 0.3458413
## 0.4 3 0.6 0.75 150 0.5591992 0.3386795
## 0.4 3 0.6 1.00 50 0.5905539 0.3855412
## 0.4 3 0.6 1.00 100 0.5749493 0.3622197
## 0.4 3 0.6 1.00 150 0.5580374 0.3367264
## 0.4 3 0.8 0.50 50 0.5675731 0.3511786
## 0.4 3 0.8 0.50 100 0.5278560 0.2915479
## 0.4 3 0.8 0.50 150 0.5434896 0.3149551
## 0.4 3 0.8 0.75 50 0.5724228 0.3585771
## 0.4 3 0.8 0.75 100 0.5580220 0.3368919
## 0.4 3 0.8 0.75 150 0.5580227 0.3369028
## 0.4 3 0.8 1.00 50 0.5942848 0.3911500
## 0.4 3 0.8 1.00 100 0.5809153 0.3710749
## 0.4 3 0.8 1.00 150 0.5652663 0.3478006
##
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning
## parameter 'min_child_weight' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were nrounds = 50, max_depth = 1, eta
## = 0.4, gamma = 0, colsample_bytree = 0.8, min_child_weight = 1 and subsample
## = 0.5.
print(rf_model_IMDB_Category_cv)
## Random Forest
##
## 828 samples
## 26 predictor
## 3 classes: 'Excellent', 'Good', 'Poor'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 745, 745, 745, 745, 746, 746, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.4866735 0.1274336
## 4 0.4842345 0.1622359
## 6 0.4661328 0.1441397
## 8 0.4552160 0.1360659
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
# Evaluate Critic Score Model
true_critic_scores <- test_data$Critic_score_category
predicted_critic_scores <- predict(xgb_model_Critic_score_cv, newdata = test_data)
confusion_matrix_critic <- confusionMatrix(
factor(predicted_critic_scores, levels = levels(true_critic_scores)),
factor(true_critic_scores)
)
print(confusion_matrix_critic)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Moderate Popular Unpopular
## Moderate 29 13 18
## Popular 39 82 41
## Unpopular 43 25 66
##
## Overall Statistics
##
## Accuracy : 0.4972
## 95% CI : (0.4441, 0.5504)
## No Information Rate : 0.3511
## P-Value [Acc > NIR] : 1.118e-08
##
## Kappa : 0.2403
##
## Mcnemar's Test P-Value : 5.543e-06
##
## Statistics by Class:
##
## Class: Moderate Class: Popular Class: Unpopular
## Sensitivity 0.26126 0.6833 0.5280
## Specificity 0.87347 0.6610 0.7056
## Pos Pred Value 0.48333 0.5062 0.4925
## Neg Pred Value 0.72297 0.8041 0.7342
## Prevalence 0.31180 0.3371 0.3511
## Detection Rate 0.08146 0.2303 0.1854
## Detection Prevalence 0.16854 0.4551 0.3764
## Balanced Accuracy 0.56737 0.6722 0.6168
# Evaluate Gross Category Model
true_gross_category <- test_data$Log_Worldwide_Gross_Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = test_data)
confusion_matrix_gross <- confusionMatrix(
factor(predicted_gross_category, levels = levels(true_gross_category)),
factor(true_gross_category)
)
print(confusion_matrix_gross)
## Confusion Matrix and Statistics
##
## Reference
## Prediction High's Low's Medium
## High's 84 4 30
## Low's 11 78 28
## Medium 28 32 61
##
## Overall Statistics
##
## Accuracy : 0.6264
## 95% CI : (0.5739, 0.6768)
## No Information Rate : 0.3455
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.4396
##
## Mcnemar's Test P-Value : 0.3077
##
## Statistics by Class:
##
## Class: High's Class: Low's Class: Medium
## Sensitivity 0.6829 0.6842 0.5126
## Specificity 0.8541 0.8388 0.7468
## Pos Pred Value 0.7119 0.6667 0.5041
## Neg Pred Value 0.8361 0.8494 0.7532
## Prevalence 0.3455 0.3202 0.3343
## Detection Rate 0.2360 0.2191 0.1713
## Detection Prevalence 0.3315 0.3287 0.3399
## Balanced Accuracy 0.7685 0.7615 0.6297
# Evaluate IMDB Category Model
true_imdb_category <- test_data$IMDB_Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = test_data)
confusion_matrix_imdb <- confusionMatrix(
factor(predicted_imdb_category, levels = levels(true_imdb_category)),
factor(true_imdb_category)
)
print(confusion_matrix_imdb)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Excellent Good Poor
## Excellent 11 15 3
## Good 61 131 97
## Poor 2 17 19
##
## Overall Statistics
##
## Accuracy : 0.4522
## 95% CI : (0.3997, 0.5056)
## No Information Rate : 0.4579
## P-Value [Acc > NIR] : 0.6043
##
## Kappa : 0.0485
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: Excellent Class: Good Class: Poor
## Sensitivity 0.14865 0.8037 0.15966
## Specificity 0.93617 0.1813 0.91983
## Pos Pred Value 0.37931 0.4533 0.50000
## Neg Pred Value 0.80734 0.5224 0.68553
## Prevalence 0.20787 0.4579 0.33427
## Detection Rate 0.03090 0.3680 0.05337
## Detection Prevalence 0.08146 0.8118 0.10674
## Balanced Accuracy 0.54241 0.4925 0.53975
## Cruella 2021, moderate film
# Load required libraries
library(caret)
library(nnet) # For multinomial logistic regression
library(xgboost) # For XGBoost
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(200000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 1,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate High's Good
## Mank 2020, loss making movie, rating were good
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(25000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Don't worry Darling 2022, Moderate as per rating, high revenue
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(35000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 1,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular Medium Good
## Minari 2021, Good in critic and hit revenue
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(2000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 1,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Cats 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(95000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 1
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Poor
# Ford v Ferrari 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(97600000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 1,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Excellent
# Alita: Battle Angel 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(170000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 5,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 1,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
# Black Christmas 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(5000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 4,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
# Ready or Not 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(6000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 4,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
# Magic Mike's Last Dance 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(45000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
# Tyler Perry's A Madea Family Funeral 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(20000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular Medium Poor
# The little things 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(30000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 1,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Those Who Wish Me Dead 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(20000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate Low's Good
## The batman 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(185000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 4,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## The outfit 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(5000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 4,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 1,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate Low's Good
## Call of the wild 2020
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(109000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate High's Poor
## Nomadland 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(5000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 1,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Promising young women 2020
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(10000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action =0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 1,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Parasite 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(11000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Joker 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(55000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 1,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## 1917 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(100000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate High's Good
## Oppenheimer 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(100000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Excellent
## Spider-man No way home 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(200000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Top Gun: Maverick 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(170000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Avatar: The way of Water 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(460000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Barbie 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(145000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
## Guardians of Galaxy Vol 3
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(250000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Once upon the time in Hollywood 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(95000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate High's Good
## The lighthouse 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(11000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Midsommar 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(9000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 0,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Creed 3 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(75000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Tenet 2020
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(205000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
## Dune 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(165000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## No time to die 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(250000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Elvis 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(85000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Excellent
## Mission impossible 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(291000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
## The super Mario Bros 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(100000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 1,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Moderate High's Good
## Killers of the moon 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(200000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 1,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
## Doctor Sleep 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(45000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Excellent
## The french Dispatch 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(25000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## minions: The rise of Gru 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(85000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 1,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
## Past lives 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(12000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## The Holdovers 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(13000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Jojo Rabbit 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(14000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Possessor 2020
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(2500000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Titane 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(6600000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 1,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## The green knight 2021
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(15000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Nope 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(68000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Infinity Pool 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(4500000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Beau is Afraid 2023
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(35000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Medium Good
## Dolittle 2020
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(175000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Poor
## Morbuis 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(75000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Firestarter 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(12000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular Medium Good
## The 355 2022
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(75000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 2,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Winnie the pooh: Blood and Honey
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(100000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 0,
Less.than.90 = 1,
Spring = 0,
Summer = 0,
Fall = 0,
Winter = 1,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Poor
## Us 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(20000000), # Example: Log of budget
PG.13 = 0,
R = 1,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 1,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular Medium Good
## Godzilla: King of the Monsters
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(170000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Capain Marvel 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(152000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## Ad Astra 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(87500000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 0,
Fall = 1,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## The Farewell
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(250300), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 2,
Main_Action = 0,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 1,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular Low's Good
## Avengers: Endgame 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(356000000), # Example: Log of budget
PG.13 = 1,
R = 0,
PG = 0,
G = 0,
between_90_to_135 = 0,
Greater_than_135 = 1,
Less.than.90 = 0,
Spring = 1,
Summer = 0,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 1,
Main_Adventure = 0,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Unpopular High's Good
## The Lion King 2019
# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
Log_production_budget_adj = log(260000000), # Example: Log of budget
PG.13 = 0,
R = 0,
PG = 1,
G = 0,
between_90_to_135 = 1,
Greater_than_135 = 0,
Less.than.90 = 0,
Spring = 0,
Summer = 1,
Fall = 0,
Winter = 0,
genre_count = 3,
Main_Action = 0,
Main_Adventure = 1,
Main_Animation = 0,
Main_Comedy = 0,
Main_Crime = 0,
Main_Documentary = 0,
Main_Drama = 0,
Main_Family = 0,
Main_Fantasy = 0,
Main_Horror = 0,
Main_Mystery = 0,
Main_History = 0,
Main_Romance = 0,
Main_Science_Fiction = 0,
Main_Thriller = 0,
Other_Genres = 0
)
# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category: Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category: High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category: Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
Critic_score_Prediction = predicted_Critic_score,
Gross_Category_Prediction = predicted_gross_category,
IMDB_Category_Prediction = predicted_imdb_category
)
# Print the results to console
print(predictions)
## Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1 Popular High's Good
# # Optionally save the results to a CSV
# write.csv(data, "fdata.csv", row.names = FALSE)
#